Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Func-y scripts #87

Merged
merged 17 commits into from
Feb 4, 2025
Merged
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,4 @@ repos:
hooks:
- id: snakefmt
files: Snakefile*|\.smk
exclude: channel_merge.smk
35 changes: 32 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,43 @@ docs = [

[project.scripts]
dataprod = "legenddataflow.execenv:dataprod"
create_chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer using dashes in command lines (see similar comment about command line options).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure maybe add as an issue and can get to it sometime

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i can also do it

merge_channels = "legenddataflow.scripts.merge_channels:merge_channels"
build_filedb = "legenddataflow.scripts.build_filedb:build_filedb"
build_tier_dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp"
build_tier_evt = "legenddataflow.scripts.tier.evt:build_tier_evt"
build_tier_hit = "legenddataflow.scripts.tier.hit:build_tier_hit"
build_tier_raw_blind = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind"
build_tier_raw_fcio = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio"
build_tier_raw_orca = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca"
build_tier_skm = "legenddataflow.scripts.tier.skm:build_tier_skm"
build_tier_tcm = "legenddataflow.scripts.tier.tcm:build_tier_tcm"
par_geds_dsp_dplms = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms"
par_geds_dsp_eopt = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt"
par_geds_dsp_evtsel = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel"
par_geds_dsp_nopt = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt"
par_geds_dsp_svm_build = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build"
par_geds_dsp_svm = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm"
par_geds_dsp_tau = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau"
par_geds_hit_aoe = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe"
par_geds_hit_ecal = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal"
par_geds_hit_lq = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq"
par_geds_hit_qc = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc"
par_geds_pht_aoe = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe"
par_geds_pht_ecal_part = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part"
par_geds_pht_fast = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast"
par_geds_pht_qc_phy = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy"
par_geds_pht_qc = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc"
par_geds_psp_average = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average"
par_geds_raw_blindcal = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal"
par_geds_raw_blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck"
par_geds_tcm_pulser = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser"

[tool.uv.workspace]
exclude = ["generated", "inputs", "software", "workflow"]

[tool.uv]
dev-dependencies = [
"legend-dataflow[test]",
]
default-groups = []

[tool.pytest.ini_options]
minversion = "6.0"
Expand Down
40 changes: 0 additions & 40 deletions tests/dummy_cycle/config.json

This file was deleted.

58 changes: 58 additions & 0 deletions tests/dummy_cycle/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
paths:
sandbox_path: ""
tier_daq: $_/input_data/tier/daq
tier_raw_blind: ""

workflow: $_/workflow

metadata: $_/inputs
config: $_/inputs/dataprod/config
par_overwrite: $_/inputs/dataprod/overrides
chan_map: $_/inputs/hardware/configuration
detector_status: $_/inputs/datasets
detector_db: $_/inputs/hardware/detectors

tier: $_/generated/tier
tier_raw: /data2/public/prodenv/prod-blind/ref-raw/generated/tier/raw
tier_tcm: $_/generated/tier/tcm
tier_dsp: $_/generated/tier/dsp
tier_hit: $_/generated/tier/hit
tier_ann: $_/generated/tier/ann
tier_evt: $_/generated/tier/evt
tier_psp: $_/generated/tier/psp
tier_pht: $_/generated/tier/pht
tier_pan: $_/generated/tier/pan
tier_pet: $_/generated/tier/pet
tier_skm: $_/generated/tier/skm

par: $_/generated/par
par_raw: $_/generated/par/raw
par_tcm: $_/generated/par/tcm
par_dsp: $_/generated/par/dsp
par_hit: $_/generated/par/hit
par_evt: $_/generated/par/evt
par_psp: $_/generated/par/psp
par_pht: $_/generated/par/pht
par_pet: $_/generated/par/pet

plt: $_/generated/plt
log: $_/generated/log

tmp_plt: $_/generated/tmp/plt
tmp_log: $_/generated/tmp/log
tmp_filelists: $_/generated/tmp/filelists
tmp_par: $_/generated/tmp/par

src: $_/software/python/src
install: $_/.snakemake/legend-dataflow/venv

table_format:
raw: ch{ch:07d}/raw
dsp: ch{ch:07d}/dsp
psp: ch{ch:07d}/dsp
hit: ch{ch:07d}/hit
pht: ch{ch:07d}/hit
evt: "{grp}/evt"
pet: "{grp}/evt"
skm: "{grp}/skm"
tcm: hardware_tcm_1
3 changes: 0 additions & 3 deletions tests/dummy_cycle/generated/par/dsp/validity.jsonl

This file was deleted.

This file was deleted.

24 changes: 14 additions & 10 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from datetime import datetime
from pathlib import Path

import yaml
from legenddataflow import (
FileKey,
ParsKeyResolve,
Expand All @@ -11,15 +12,17 @@

testprod = Path(__file__).parent / "dummy_cycle"

with (testprod / "config.json").open() as r:
setup = json.load(r)
with (testprod / "config.yaml").open() as r:
setup = yaml.safe_load(r)
subst_vars(setup, var_values={"_": str(testprod)})
setup = setup["setups"]["test"]


def test_util():
assert utils.tier_path(setup) == str(testprod / "generated/tier")
assert utils.unix_time("20230101T123456Z") == 1672572896.0
time = datetime.now()
assert int(utils.unix_time(time.strftime("%Y%m%dT%H%M%SZ"))) == int(
time.timestamp()
)


def test_filekey():
Expand All @@ -42,7 +45,7 @@ def test_filekey():
assert (
FileKey.get_filekey_from_pattern(
key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0],
utils.get_pattern_tier(setup, "dsp"),
patterns.get_pattern_tier(setup, "dsp"),
).name
== key.name
)
Expand Down Expand Up @@ -71,9 +74,10 @@ def test_create_pars_keylist():
"cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml",
"lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml",
}

keylist = sorted(
ParsKeyResolve.get_keys("-*-*-*-cal", patterns.get_pattern_tier_daq(setup)),
ParsKeyResolve.get_keys(
"-*-*-*-cal", patterns.get_pattern_tier_daq(setup, extension="*")
),
key=FileKey.get_unix_timestamp,
)
assert keylist == [
Expand All @@ -98,6 +102,6 @@ def test_create_pars_keylist():
pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]}
)[1].apply
) == {
"cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
"lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json",
"cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml",
"lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.yaml",
}
27 changes: 9 additions & 18 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,25 @@ from datetime import datetime
from collections import OrderedDict
import logging

from dbetto import AttrsDict
from legendmeta import LegendMetadata
from legenddataflow import CalGrouping
from legenddataflow import utils

utils.subst_vars_in_snakemake_config(workflow, config)
config = AttrsDict(config)

check_in_cycle = True
configs = utils.config_path(config)
chan_maps = utils.chan_map_path(config)
meta = utils.metadata_path(config)
det_status = utils.det_status_path(config)
swenv = utils.runcmd(config)
basedir = workflow.basedir

# wait for new pylegendmeta release
# if not Path(meta).exists():
# meta = LegendMetadata()
# meta.checkout(config["configs"]["l200"]["legend_metadata_version"])
time = datetime.now().strftime("%Y%m%dT%H%M%SZ")

if not Path(meta).exists():
LegendMetadata(meta).checkout(config.legend_metadata_version)

part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml")

Expand All @@ -57,9 +58,11 @@ include: "rules/dsp_pars_geds.smk"
include: "rules/dsp.smk"
include: "rules/psp_pars_geds.smk"
include: "rules/psp.smk"
include: "rules/hit_pars_geds.smk"
include: "rules/hit.smk"
include: "rules/pht_pars_geds.smk"
include: "rules/pht_pars_geds_fast.smk"
include: "rules/pht.smk"
include: "rules/pht_fast.smk"
include: "rules/ann.smk"
include: "rules/evt.smk"
include: "rules/skm.smk"
Expand Down Expand Up @@ -146,18 +149,6 @@ onsuccess:
if os.path.exists(utils.filelist_path(config)):
os.rmdir(utils.filelist_path(config))

# remove logs
files = glob.glob(os.path.join(utils.tmp_log_path(config), "*", "*.log"))
for file in files:
if os.path.isfile(file):
os.remove(file)
dirs = glob.glob(os.path.join(utils.tmp_log_path(config), "*"))
for d in dirs:
if os.path.isdir(d):
os.rmdir(d)
if os.path.exists(utils.tmp_log_path(config)):
os.rmdir(utils.tmp_log_path(config))


rule gen_filelist:
"""Generate file list.
Expand Down
7 changes: 6 additions & 1 deletion workflow/Snakefile-build-raw
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,21 @@ import os, sys
from pathlib import Path
from legenddataflow import patterns as patt
from legenddataflow import utils, execenv, ParsKeyResolve
from datetime import datetime
from dbetto import AttrsDict

utils.subst_vars_in_snakemake_config(workflow, config)
config = AttrsDict(config)

check_in_cycle = True
swenv = execenv.execenv_prefix(config)
meta_path = utils.metadata_path(config)
det_status = utils.det_status_path(config)

time = datetime.now().strftime("%Y%m%dT%H%M%SZ")

if not Path(meta_path).exists():
LegendMetadata(meta_path).checkout(config["legend_metadata_version"])
LegendMetadata(meta_path).checkout(config.legend_metadata_version)


wildcard_constraints:
Expand Down
23 changes: 11 additions & 12 deletions workflow/rules/ann.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,28 @@ from legenddataflow.patterns import (
get_pattern_log,
get_pattern_pars,
)
from legenddataflow.execenv import execenv_smk_py_script


rule build_ann:
input:
dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False),
pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
params:
timestamp="{timestamp}",
datatype="{datatype}",
output:
tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle),
db_file=get_pattern_pars_tmp(setup, "ann_db"),
tier_file=get_pattern_tier(config, "ann", check_in_cycle=check_in_cycle),
db_file=get_pattern_pars_tmp(config, "ann_db"),
log:
get_pattern_log(setup, "tier_ann"),
get_pattern_log(config, "tier_ann", time),
group:
"tier-ann"
resources:
runtime=300,
mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
shell:
"{swenv} python3 -B "
f"{workflow.source_path('../scripts/build_dsp.py')} "
f'{execenv_smk_py_script(config, "build_tier_dsp")}'
"--log {log} "
"--configs {configs} "
"--metadata {meta} "
Expand All @@ -45,24 +45,23 @@ rule build_ann:

rule build_pan:
input:
dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False),
dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False),
pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
params:
timestamp="{timestamp}",
datatype="{datatype}",
output:
tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle),
db_file=get_pattern_pars_tmp(setup, "pan_db"),
tier_file=get_pattern_tier(config, "pan", check_in_cycle=check_in_cycle),
db_file=get_pattern_pars_tmp(config, "pan_db"),
log:
get_pattern_log(setup, "tier_pan"),
get_pattern_log(config, "tier_pan", time),
group:
"tier-ann"
resources:
runtime=300,
mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
shell:
"{swenv} python3 -B "
f"{workflow.source_path('../scripts/build_dsp.py')} "
f'{execenv_smk_py_script(config, "build_tier_dsp")}'
"--log {log} "
"--configs {configs} "
"--metadata {meta} "
Expand Down
Loading