diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0611a74..8f713be 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -103,3 +103,4 @@ repos: hooks: - id: snakefmt files: Snakefile*|\.smk + exclude: channel_merge.smk diff --git a/pyproject.toml b/pyproject.toml index 807e71b..86f7d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,14 +84,43 @@ docs = [ [project.scripts] dataprod = "legenddataflow.execenv:dataprod" +create_chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist" +merge_channels = "legenddataflow.scripts.merge_channels:merge_channels" +build_filedb = "legenddataflow.scripts.build_filedb:build_filedb" +build_tier_dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp" +build_tier_evt = "legenddataflow.scripts.tier.evt:build_tier_evt" +build_tier_hit = "legenddataflow.scripts.tier.hit:build_tier_hit" +build_tier_raw_blind = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind" +build_tier_raw_fcio = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio" +build_tier_raw_orca = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca" +build_tier_skm = "legenddataflow.scripts.tier.skm:build_tier_skm" +build_tier_tcm = "legenddataflow.scripts.tier.tcm:build_tier_tcm" +par_geds_dsp_dplms = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms" +par_geds_dsp_eopt = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt" +par_geds_dsp_evtsel = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel" +par_geds_dsp_nopt = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt" +par_geds_dsp_svm_build = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build" +par_geds_dsp_svm = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm" +par_geds_dsp_tau = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau" +par_geds_hit_aoe = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe" +par_geds_hit_ecal = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal" +par_geds_hit_lq = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq" +par_geds_hit_qc = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc" +par_geds_pht_aoe = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe" +par_geds_pht_ecal_part = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part" +par_geds_pht_fast = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast" +par_geds_pht_qc_phy = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy" +par_geds_pht_qc = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc" +par_geds_psp_average = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average" +par_geds_raw_blindcal = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal" +par_geds_raw_blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck" +par_geds_tcm_pulser = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser" [tool.uv.workspace] exclude = ["generated", "inputs", "software", "workflow"] [tool.uv] -dev-dependencies = [ - "legend-dataflow[test]", -] +default-groups = [] [tool.pytest.ini_options] minversion = "6.0" diff --git a/tests/dummy_cycle/config.json b/tests/dummy_cycle/config.json deleted file mode 100644 index e9a358d..0000000 --- a/tests/dummy_cycle/config.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "setups": { - "test": { - "paths": { - "sandbox_path": "", - "tier_daq": "$_/input_data/tier/daq", - - "dataflow": "$_/dataflow", - - "metadata": "$_/inputs", - "config": "$_/inputs/dataprod/config", - "par_overwrite": "$_/inputs/dataprod/overrides", - "chan_map": "$_/inputs/hardware/configuration", - "detector_db": "$_/inputs/hardware/detectors", - - "tier": "$_/generated/tier", - "tier_raw": "$_/generated/tier/raw", - "tier_tcm": "$_/generated/tier/tcm", - "tier_dsp": "$_/generated/tier/dsp", - "tier_hit": "$_/generated/tier/hit", - "tier_evt": "$_/generated/tier/evt", - - "par": "$_/generated/par", - "par_raw": "$_/generated/par/raw", - "par_tcm": "$_/generated/par/tcm", - "par_dsp": "$_/generated/par/dsp", - "par_hit": "$_/generated/par/hit", - "par_evt": "$_/generated/par/evt", - - "plt": "$_/generated/plt", - "log": "$_/generated/log", - - "tmp_plt": "$_/generated/tmp/plt", - "tmp_log": "$_/generated/tmp/log", - "tmp_filelists": "$_/generated/tmp/filelists", - "tmp_par": "$_/generated/tmp/par" - } - } - } -} diff --git a/tests/dummy_cycle/config.yaml b/tests/dummy_cycle/config.yaml new file mode 100644 index 0000000..97de306 --- /dev/null +++ b/tests/dummy_cycle/config.yaml @@ -0,0 +1,58 @@ +paths: + sandbox_path: "" + tier_daq: $_/input_data/tier/daq + tier_raw_blind: "" + + workflow: $_/workflow + + metadata: $_/inputs + config: $_/inputs/dataprod/config + par_overwrite: $_/inputs/dataprod/overrides + chan_map: $_/inputs/hardware/configuration + detector_status: $_/inputs/datasets + detector_db: $_/inputs/hardware/detectors + + tier: $_/generated/tier + tier_raw: /data2/public/prodenv/prod-blind/ref-raw/generated/tier/raw + tier_tcm: $_/generated/tier/tcm + tier_dsp: $_/generated/tier/dsp + tier_hit: $_/generated/tier/hit + tier_ann: $_/generated/tier/ann + tier_evt: $_/generated/tier/evt + tier_psp: $_/generated/tier/psp + tier_pht: $_/generated/tier/pht + tier_pan: $_/generated/tier/pan + tier_pet: $_/generated/tier/pet + tier_skm: $_/generated/tier/skm + + par: $_/generated/par + par_raw: $_/generated/par/raw + par_tcm: $_/generated/par/tcm + par_dsp: $_/generated/par/dsp + par_hit: $_/generated/par/hit + par_evt: $_/generated/par/evt + par_psp: $_/generated/par/psp + par_pht: $_/generated/par/pht + par_pet: $_/generated/par/pet + + plt: $_/generated/plt + log: $_/generated/log + + tmp_plt: $_/generated/tmp/plt + tmp_log: $_/generated/tmp/log + tmp_filelists: $_/generated/tmp/filelists + tmp_par: $_/generated/tmp/par + + src: $_/software/python/src + install: $_/.snakemake/legend-dataflow/venv + +table_format: + raw: ch{ch:07d}/raw + dsp: ch{ch:07d}/dsp + psp: ch{ch:07d}/dsp + hit: ch{ch:07d}/hit + pht: ch{ch:07d}/hit + evt: "{grp}/evt" + pet: "{grp}/evt" + skm: "{grp}/skm" + tcm: hardware_tcm_1 diff --git a/tests/dummy_cycle/generated/par/dsp/validity.jsonl b/tests/dummy_cycle/generated/par/dsp/validity.jsonl deleted file mode 100644 index c730b86..0000000 --- a/tests/dummy_cycle/generated/par/dsp/validity.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"valid_from": "20230101T123456Z", "category": "all", "apply": ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]} -{"valid_from": "20230110T123456Z", "category": "all", "apply": ["lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json", "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]} -{"valid_from": "20230202T004321Z", "category": "all", "apply": ["cal/p00/r001/l200-p00-r001-cal-20230202T004321Z-par_dsp.json","lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json"]} diff --git a/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl b/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl deleted file mode 100644 index 4a13449..0000000 --- a/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"valid_from": "20230101T123456Z", "category": "all", "apply": ["cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json"]} diff --git a/tests/test_util.py b/tests/test_util.py index 38d8910..9d3c424 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,6 +1,7 @@ -import json +from datetime import datetime from pathlib import Path +import yaml from legenddataflow import ( FileKey, ParsKeyResolve, @@ -11,15 +12,17 @@ testprod = Path(__file__).parent / "dummy_cycle" -with (testprod / "config.json").open() as r: - setup = json.load(r) +with (testprod / "config.yaml").open() as r: + setup = yaml.safe_load(r) subst_vars(setup, var_values={"_": str(testprod)}) -setup = setup["setups"]["test"] def test_util(): assert utils.tier_path(setup) == str(testprod / "generated/tier") - assert utils.unix_time("20230101T123456Z") == 1672572896.0 + time = datetime.now() + assert int(utils.unix_time(time.strftime("%Y%m%dT%H%M%SZ"))) == int( + time.timestamp() + ) def test_filekey(): @@ -42,7 +45,7 @@ def test_filekey(): assert ( FileKey.get_filekey_from_pattern( key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0], - utils.get_pattern_tier(setup, "dsp"), + patterns.get_pattern_tier(setup, "dsp"), ).name == key.name ) @@ -71,9 +74,10 @@ def test_create_pars_keylist(): "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml", "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml", } - keylist = sorted( - ParsKeyResolve.get_keys("-*-*-*-cal", patterns.get_pattern_tier_daq(setup)), + ParsKeyResolve.get_keys( + "-*-*-*-cal", patterns.get_pattern_tier_daq(setup, extension="*") + ), key=FileKey.get_unix_timestamp, ) assert keylist == [ @@ -98,6 +102,6 @@ def test_create_pars_keylist(): pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]} )[1].apply ) == { - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", - "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json", + "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml", + "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.yaml", } diff --git a/workflow/Snakefile b/workflow/Snakefile index 9fa6950..db7e3c3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -18,24 +18,25 @@ from datetime import datetime from collections import OrderedDict import logging +from dbetto import AttrsDict from legendmeta import LegendMetadata from legenddataflow import CalGrouping from legenddataflow import utils utils.subst_vars_in_snakemake_config(workflow, config) +config = AttrsDict(config) check_in_cycle = True configs = utils.config_path(config) chan_maps = utils.chan_map_path(config) meta = utils.metadata_path(config) det_status = utils.det_status_path(config) -swenv = utils.runcmd(config) basedir = workflow.basedir -# wait for new pylegendmeta release -# if not Path(meta).exists(): -# meta = LegendMetadata() -# meta.checkout(config["configs"]["l200"]["legend_metadata_version"]) +time = datetime.now().strftime("%Y%m%dT%H%M%SZ") + +if not Path(meta).exists(): + LegendMetadata(meta).checkout(config.legend_metadata_version) part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml") @@ -57,9 +58,11 @@ include: "rules/dsp_pars_geds.smk" include: "rules/dsp.smk" include: "rules/psp_pars_geds.smk" include: "rules/psp.smk" +include: "rules/hit_pars_geds.smk" include: "rules/hit.smk" +include: "rules/pht_pars_geds.smk" +include: "rules/pht_pars_geds_fast.smk" include: "rules/pht.smk" -include: "rules/pht_fast.smk" include: "rules/ann.smk" include: "rules/evt.smk" include: "rules/skm.smk" @@ -146,18 +149,6 @@ onsuccess: if os.path.exists(utils.filelist_path(config)): os.rmdir(utils.filelist_path(config)) - # remove logs - files = glob.glob(os.path.join(utils.tmp_log_path(config), "*", "*.log")) - for file in files: - if os.path.isfile(file): - os.remove(file) - dirs = glob.glob(os.path.join(utils.tmp_log_path(config), "*")) - for d in dirs: - if os.path.isdir(d): - os.rmdir(d) - if os.path.exists(utils.tmp_log_path(config)): - os.rmdir(utils.tmp_log_path(config)) - rule gen_filelist: """Generate file list. diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index fafd20c..6346978 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -10,16 +10,21 @@ import os, sys from pathlib import Path from legenddataflow import patterns as patt from legenddataflow import utils, execenv, ParsKeyResolve +from datetime import datetime +from dbetto import AttrsDict utils.subst_vars_in_snakemake_config(workflow, config) +config = AttrsDict(config) check_in_cycle = True swenv = execenv.execenv_prefix(config) meta_path = utils.metadata_path(config) det_status = utils.det_status_path(config) +time = datetime.now().strftime("%Y%m%dT%H%M%SZ") + if not Path(meta_path).exists(): - LegendMetadata(meta_path).checkout(config["legend_metadata_version"]) + LegendMetadata(meta_path).checkout(config.legend_metadata_version) wildcard_constraints: diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index 2565514..8e7429f 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -9,28 +9,28 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script rule build_ann: input: - dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False), pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), params: timestamp="{timestamp}", datatype="{datatype}", output: - tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "ann_db"), + tier_file=get_pattern_tier(config, "ann", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "ann_db"), log: - get_pattern_log(setup, "tier_ann"), + get_pattern_log(config, "tier_ann", time), group: "tier-ann" resources: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_dsp.py')} " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -45,24 +45,23 @@ rule build_ann: rule build_pan: input: - dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False), pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), params: timestamp="{timestamp}", datatype="{datatype}", output: - tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "pan_db"), + tier_file=get_pattern_tier(config, "pan", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "pan_db"), log: - get_pattern_log(setup, "tier_pan"), + get_pattern_log(config, "tier_pan", time), group: "tier-ann" resources: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_dsp.py')} " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index b8076d7..1a69313 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -12,6 +12,7 @@ from legenddataflow.patterns import ( get_pattern_log_channel, ) from pathlib import Path +from legenddataflow.execenv import execenv_smk_py_script rule build_blinding_calibration: @@ -20,7 +21,7 @@ rule build_blinding_calibration: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=Path(filelist_path(setup)) + files=Path(filelist_path(config)) / "all-{experiment}-{period}-{run}-cal-raw.filelist", params: timestamp="{timestamp}", @@ -28,17 +29,16 @@ rule build_blinding_calibration: channel="{channel}", meta=meta, output: - par_file=temp(get_pattern_pars_tmp_channel(setup, "raw_blindcal")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw_blindcal")), + par_file=temp(get_pattern_pars_tmp_channel(config, "raw_blindcal")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "raw_blindcal")), log: - get_pattern_log_channel(setup, "pars_hit_blind_cal"), + get_pattern_log_channel(config, "pars_hit_blind_cal", time), group: "par-raw-blinding" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/blinding_calibration.py " + f'{execenv_smk_py_script(config, "par_geds_raw_blindcal")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -53,7 +53,7 @@ rule build_blinding_calibration: rule build_plts_blinding: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -62,12 +62,11 @@ rule build_plts_blinding: name="blindcal", ), output: - get_pattern_plts(setup, "raw", name="blindcal"), + get_pattern_plts(config, "raw", name="blindcal"), group: "merge-blindcal" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -75,7 +74,7 @@ rule build_plts_blinding: rule build_pars_blinding: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -83,13 +82,12 @@ rule build_pars_blinding: chan_maps, name="blindcal", ), - plts=get_pattern_plts(setup, "raw", name="blindcal"), + plts=get_pattern_plts(config, "raw", name="blindcal"), output: - get_pattern_pars(setup, "raw", name="blindcal", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "raw", name="blindcal", check_in_cycle=check_in_cycle), group: "merge-blindcal" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index b142c19..bd9b796 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -12,6 +12,7 @@ from legenddataflow.patterns import ( get_pattern_plts, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script from pathlib import Path @@ -21,7 +22,7 @@ rule build_blinding_check: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=Path(filelist_path(setup)) + files=Path(filelist_path(config)) / "all-{experiment}-{period}-{run}-cal-raw.filelist", par_file=get_blinding_curve_file, params: @@ -29,17 +30,16 @@ rule build_blinding_check: datatype="cal", channel="{channel}", output: - check_file=temp(get_pattern_pars_tmp_channel(setup, "raw")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw")), + check_file=temp(get_pattern_pars_tmp_channel(config, "raw")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "raw")), log: - get_pattern_log_channel(setup, "pars_hit_blind_check"), + get_pattern_log_channel(config, "pars_hit_blind_check", time), group: "par-hit" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/check_blinding.py " + f'{execenv_smk_py_script(config, "par_geds_raw_blindcheck")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -55,7 +55,7 @@ rule build_blinding_check: rule build_plts_raw: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -63,12 +63,11 @@ rule build_plts_raw: chan_maps, ), output: - get_pattern_plts(setup, "raw"), + get_pattern_plts(config, "raw"), group: "merge-raw" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -76,7 +75,7 @@ rule build_plts_raw: rule build_pars_raw: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -84,13 +83,12 @@ rule build_pars_raw: chan_maps, ), plts=get_pattern_plts( - setup, + config, "raw", ), output: - get_pattern_pars(setup, "raw", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "raw", check_in_cycle=check_in_cycle), group: "merge-raw" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk index 4e46f13..b6a3ea8 100644 --- a/workflow/rules/chanlist_gen.smk +++ b/workflow/rules/chanlist_gen.smk @@ -9,16 +9,11 @@ from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) -from legenddataflow.utils import filelist_path, runcmd +from legenddataflow import execenv_smk_py_script +from legenddataflow.utils import filelist_path -def get_par_chanlist( - setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" -): - tier_pattern = "((?P[^_]+)(\\_(?P[^_]+)(\\_(?P[^_]+)?)?)?)?" - keypart_rx = re.compile(tier_pattern) - d = keypart_rx.match(tier).groupdict() - +def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps): key = ChannelProcKey.parse_keypart(keypart) flist_path = filelist_path(setup) @@ -28,37 +23,36 @@ def get_par_chanlist( f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" - cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" + cmd = execenv_smk_py_script(config, "create_chankeylist") + cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " + cmd += f"--datatype cal --output_file {output_file}" os.system(cmd) with open(output_file) as r: chan_list = r.read().splitlines() + os.remove(output_file) + return chan_list + + +def get_par_chanlist( + setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" +): + + chan_list = get_chanlist(setup, keypart, workflow, config, det_status, chan_maps) par_pattern = get_pattern_pars_tmp_channel(setup, tier, name, extension) filenames = ChannelProcKey.get_channel_files(keypart, par_pattern, chan_list) - os.remove(output_file) + return filenames def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None): - key = ChannelProcKey.parse_keypart(keypart) - - output_file = os.path.join( - filelist_path(setup), - f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", - ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" - cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" - os.system(cmd) - - with open(output_file) as r: - chan_list = r.read().splitlines() + chan_list = get_chanlist(setup, keypart, workflow, config, det_status, chan_maps) par_pattern = get_pattern_plts_tmp_channel(setup, tier, name) filenames = ChannelProcKey.get_channel_files(keypart, par_pattern, chan_list) - os.remove(output_file) + return filenames diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk new file mode 100644 index 0000000..b970840 --- /dev/null +++ b/workflow/rules/channel_merge.smk @@ -0,0 +1,160 @@ +from legenddataflow.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_pars, +) +from legenddataflow.utils import set_last_rule_name +import inspect +from legenddataflow.execenv import execenv_smk_py_script + +def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): + if lh5_tier is None: + lh5_tier = tier + rule: + input: + lambda wildcards: get_plt_chanlist( + config, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + tier, + basedir, + det_status, + chan_maps, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + get_pattern_plts(config, tier), + group: + f"merge-{tier}" + shell: + f'{execenv_smk_py_script(config, "merge_channels")}' + "--input {input} " + "--output {output} " + "--channelmap {meta} " + + set_last_rule_name(workflow, f"build_plts_{tier}") + + rule: + input: + lambda wildcards: get_par_chanlist( + config, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + tier, + basedir, + det_status, + chan_maps, + name="objects", + extension="pkl", + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + get_pattern_pars( + config, + tier, + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + group: + f"merge-{tier}" + shell: + f'{execenv_smk_py_script(config, "merge_channels")}' + "--input {input} " + "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " + + set_last_rule_name(workflow, f"build_pars_{tier}_objects") + + if lh5_merge is True: + rule: + input: + lambda wildcards: get_par_chanlist( + config, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + tier, + basedir, + det_status, + chan_maps, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + temp( + get_pattern_pars_tmp( + config, + tier, + datatype="cal", + ) + ), + group: + f"merge-{tier}" + shell: + f'{execenv_smk_py_script(config, "merge_channels")}' + "--input {input} " + "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " + + set_last_rule_name(workflow, f"build_pars_{tier}_db") + + rule: + input: + in_files=lambda wildcards: get_par_chanlist( + config, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + lh5_tier, + basedir, + det_status, + chan_maps, + extension="lh5" if lh5_merge is True else inspect.signature(get_par_chanlist).parameters['extension'].default, + ), + in_db=get_pattern_pars_tmp( + config, + tier, + datatype="cal", + ) if lh5_merge is True else [], + plts=get_pattern_plts(config, tier), + objects=get_pattern_pars( + config, + tier, + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + out_file=get_pattern_pars( + config, + tier, + extension="lh5" if lh5_merge is True else inspect.signature(get_pattern_pars).parameters['extension'].default, + check_in_cycle=check_in_cycle, + ), + out_db=get_pattern_pars(config, tier, check_in_cycle=check_in_cycle) if lh5_merge is True else [], + group: + f"merge-{tier}" + run: + shell_string = ( + f'{execenv_smk_py_script(config, "merge_channels")}' + "--output {output.out_file} " + "--input {input.in_files} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " + ) + if lh5_merge is True: + shell_string += ( + "--in_db {input.in_db} " + "--out_db {output.out_db} " + ) + shell(shell_string) + + set_last_rule_name(workflow, f"build_pars_{tier}") diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 17571e3..5a9bff2 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -10,20 +10,20 @@ from legenddataflow import utils def ro(path): - return utils.as_ro(setup, path) + return utils.as_ro(config, path) def get_blinding_curve_file(wildcards): """func to get the blinding calibration curves from the overrides""" par_files = Catalog.get_files( - Path(patt.par_overwrite_path(setup)) / "raw" / "validity.yaml", + Path(patt.par_overwrite_path(config)) / "raw" / "validity.yaml", wildcards.timestamp, ) if isinstance(par_files, str): - return str(Path(patt.par_overwrite_path(setup)) / "raw" / par_files) + return str(Path(patt.par_overwrite_path(config)) / "raw" / par_files) else: return [ - str(Path(patt.par_overwrite_path(setup)) / "raw" / par_file) + str(Path(patt.par_overwrite_path(config)) / "raw" / par_file) for par_file in par_files ] @@ -31,13 +31,13 @@ def get_blinding_curve_file(wildcards): def get_blinding_check_file(wildcards): """func to get the right blinding check file""" par_files = Catalog.get_files( - Path(patt.get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp + Path(patt.get_pars_path(config, "raw")) / "validity.yaml", wildcards.timestamp ) if isinstance(par_files, str): - return Path(patt.get_pars_path(setup, "raw")) / par_files + return Path(patt.get_pars_path(config, "raw")) / par_files else: return [ - Path(patt.get_pars_path(setup, "raw")) / par_file for par_file in par_files + Path(patt.get_pars_path(config, "raw")) / par_file for par_file in par_files ] @@ -67,19 +67,19 @@ def set_last_rule_name(workflow, new_name): def get_input_par_file(wildcards, tier, name): - par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml" + par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml" pars_files_overwrite = Catalog.get_files( par_overwrite_file, wildcards.timestamp, ) for pars_file in pars_files_overwrite: if name in str(pars_file): - return Path(patt.par_overwrite_path(setup)) / tier / pars_file + return Path(patt.par_overwrite_path(config)) / tier / pars_file raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): - par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml" + par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml" if timestamp is not None: pars_files_overwrite = Catalog.get_files( par_overwrite_file, @@ -97,7 +97,7 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): out_files = [] for pars_file in pars_files_overwrite: if fullname in str(pars_file): - out_files.append(Path(patt.par_overwrite_path(setup)) / tier / pars_file) + out_files.append(Path(patt.par_overwrite_path(config)) / tier / pars_file) if len(out_files) == 0: raise ValueError(f"Could not find name in {pars_files_overwrite}") else: @@ -109,8 +109,8 @@ def get_search_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return patt.get_pattern_tier_daq_unsorted(setup, extension="*") + return patt.get_pattern_tier_daq_unsorted(config, extension="*") elif tier == "raw": - return patt.get_pattern_tier_daq(setup, extension="*") + return patt.get_pattern_tier_daq(config, extension="*") else: - return patt.get_pattern_tier(setup, "raw", check_in_cycle=False) + return patt.get_pattern_tier(config, "raw", check_in_cycle=False) diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index 501ed52..f296716 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -7,7 +7,6 @@ Snakemake rules for processing dsp tier. from legenddataflow.pars_loading import ParsCatalog from legenddataflow.create_pars_keylist import ParsKeyResolve from pathlib import Path -from legenddataflow.create_pars_keylist import ParsKeyResolve from legenddataflow.patterns import ( get_pattern_plts, get_pattern_tier, @@ -15,166 +14,27 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) -dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" -if dsp_par_cat_file.is_file(): - dsp_par_cat_file.unlink() -Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) - - -rule build_plts_dsp: - input: - lambda wildcards: get_plt_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - get_pattern_plts(setup, "dsp"), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - "--channelmap {meta} " - -rule build_pars_dsp_objects: - input: - lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - get_pattern_pars( - setup, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " +include: "channel_merge.smk" -rule build_pars_dsp_db: - input: - lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - temp( - get_pattern_pars_tmp( - setup, - "dsp", - datatype="cal", - ) - ), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " - - -rule build_pars_dsp: - input: - in_files=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - setup, - "dsp", - datatype="cal", - ), - plts=get_pattern_plts(setup, "dsp"), - objects=get_pattern_pars( - setup, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - out_file=get_pattern_pars( - setup, - "dsp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " +build_merge_rules("dsp", lh5_merge=True) rule build_dsp: input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), + raw_file=get_pattern_tier(config, "raw", check_in_cycle=False), pars_file=ancient( lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "dsp" + config, wildcards.timestamp, "dsp" ) ), params: @@ -182,18 +42,17 @@ rule build_dsp: datatype="{datatype}", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, output: - tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "dsp_db"), + tier_file=get_pattern_tier(config, "dsp", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "dsp_db"), log: - get_pattern_log(setup, "tier_dsp"), + get_pattern_log(config, "tier_dsp", time), group: "tier-dsp" resources: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--tier dsp " f"--configs {ro(configs)} " diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index f526d6b..86b8342 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -4,45 +4,39 @@ Snakemake rules for building dsp pars for HPGes, before running build_dsp() - extraction of energy filter parameters and charge trapping correction for each channel from cal data """ -from legenddataflow.create_pars_keylist import pars_key_resolve +from legenddataflow.create_pars_keylist import ParsKeyResolve from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_tier_raw, + get_pattern_tier, get_pattern_log, get_pattern_pars, ) - -dsp_par_catalog = pars_key_resolve.get_par_catalog( - ["-*-*-*-cal"], - get_pattern_tier_raw(setup), - {"cal": ["par_dsp"], "lar": ["par_dsp"]}, -) +from legenddataflow.execenv import execenv_smk_py_script rule build_pars_dsp_tau_geds: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")), + decay_const=temp(get_pattern_pars_tmp_channel(config, "dsp", "decay_constant")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "decay_constant")), log: - get_pattern_log_channel(setup, "par_dsp_decay_constant"), + get_pattern_log_channel(config, "par_dsp_decay_constant", time), group: "par-dsp" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_tau_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_tau")}' "--configs {configs} " "--log {log} " "--datatype {params.datatype} " @@ -57,27 +51,26 @@ rule build_pars_dsp_tau_geds: rule build_pars_evtsel_geds: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), + pulser_file=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"), raw_cal=get_blinding_curve_file, params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")), + peak_file=temp(get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5")), log: - get_pattern_log_channel(setup, "par_dsp_event_selection"), + get_pattern_log_channel(config, "par_dsp_event_selection", time), group: "par-dsp" resources: runtime=300, mem_swap=70, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_evtsel_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_evtsel")}' "--configs {configs} " "--log {log} " "--datatype {params.datatype} " @@ -94,28 +87,27 @@ rule build_pars_evtsel_geds: rule build_pars_dsp_nopt_geds: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist" ), - database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant"), + database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"), + inplots=get_pattern_plts_tmp_channel(config, "dsp", "decay_constant"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: dsp_pars_nopt=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization") + get_pattern_pars_tmp_channel(config, "dsp", "noise_optimization") ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "noise_optimization")), log: - get_pattern_log_channel(setup, "par_dsp_noise_optimization"), + get_pattern_log_channel(config, "par_dsp_noise_optimization", time), group: "par-dsp" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_nopt_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_nopt")}' "--database {input.database} " "--configs {configs} " "--log {log} " @@ -132,30 +124,27 @@ rule build_pars_dsp_nopt_geds: rule build_pars_dsp_dplms_geds: input: fft_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist" ), - peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), - database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"), + peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5"), + database=get_pattern_pars_tmp_channel(config, "dsp", "noise_optimization"), + inplots=get_pattern_plts_tmp_channel(config, "dsp", "noise_optimization"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")), - lh5_path=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5") - ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")), + lh5_path=temp(get_pattern_pars_tmp_channel(config, "dsp", extension="lh5")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "dplms")), log: - get_pattern_log_channel(setup, "pars_dsp_dplms"), + get_pattern_log_channel(config, "pars_dsp_dplms", time), group: "par-dsp" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_dplms_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_dplms")}' "--fft_raw_filelist {input.fft_files} " "--peak_file {input.peak_file} " "--database {input.database} " @@ -173,28 +162,27 @@ rule build_pars_dsp_dplms_geds: # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files rule build_pars_dsp_eopt_geds: input: - peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), - decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"), + peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5"), + decay_const=get_pattern_pars_tmp_channel(config, "dsp", "dplms"), + inplots=get_pattern_plts_tmp_channel(config, "dsp", "dplms"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp_eopt")), qbb_grid=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl") ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp")), log: - get_pattern_log_channel(setup, "pars_dsp_eopt"), + get_pattern_log_channel(config, "pars_dsp_eopt", time), group: "par-dsp" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_eopt_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_eopt")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -210,21 +198,22 @@ rule build_pars_dsp_eopt_geds: rule build_svm_dsp_geds: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" + ), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "dsp", "svm_hyperpars") ).replace("hyperpars.json", "train.lh5"), output: - dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), + dsp_pars=get_pattern_pars(config, "dsp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + str(get_pattern_log(config, "pars_dsp_svm", time)).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_build_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}' "--log {log} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " @@ -233,19 +222,18 @@ rule build_svm_dsp_geds: rule build_pars_dsp_svm_geds: input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"), - svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"), + dsp_pars=get_pattern_pars_tmp_channel(config, "dsp_eopt"), + svm_file=get_pattern_pars(config, "dsp", "svm", "pkl"), output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp")), log: - get_pattern_log_channel(setup, "pars_dsp_svm"), + get_pattern_log_channel(config, "pars_dsp_svm", time), group: "par-dsp" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}' "--log {log} " "--input_file {input.dsp_pars} " "--output_file {output.dsp_pars} " diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index d14b8cb..1bcb2a4 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -9,33 +9,34 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_log_concat, ) +from legenddataflow.execenv import execenv_smk_py_script rule build_evt: input: - dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), - hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False), - tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False), + hit_file=get_pattern_tier(config, "hit", check_in_cycle=False), + tcm_file=get_pattern_tier(config, "tcm", check_in_cycle=False), ann_file=lambda wildcards: ( None if int(wildcards["period"][1:]) > 11 - else get_pattern_tier(setup, "ann", check_in_cycle=False) + else get_pattern_tier(config, "ann", check_in_cycle=False) ), par_files=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "hit" + config, wildcards.timestamp, "hit" ), xtalk_matrix=lambda wildcards: get_input_par_file( tier="evt", wildcards=wildcards, name="xtc" ), output: - get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "evt", check_in_cycle=check_in_cycle), params: timestamp="{timestamp}", datatype="{datatype}", tier="evt", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, f"tier_evt"), + get_pattern_log(config, f"tier_evt", time), group: "tier-evt" resources: @@ -43,8 +44,7 @@ rule build_evt: mem_swap=50, run: shell_string = ( - f"{swenv} python3 -B " - f"{basedir}/../scripts/build_evt.py " + f'{execenv_smk_py_script(config, "build_tier_evt")}' f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " @@ -66,29 +66,29 @@ rule build_evt: rule build_pet: input: - dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), - hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False), - tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False), + hit_file=get_pattern_tier(config, "pht", check_in_cycle=False), + tcm_file=get_pattern_tier(config, "tcm", check_in_cycle=False), ann_file=lambda wildcards: ( None if int(wildcards["period"][1:]) > 11 - else get_pattern_tier(setup, "pan", check_in_cycle=False) + else get_pattern_tier(config, "pan", check_in_cycle=False) ), par_files=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "pht" + config, wildcards.timestamp, "pht" ), xtalk_matrix=lambda wildcards: get_input_par_file( tier="pet", wildcards=wildcards, name="xtc" ), output: - get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "pet", check_in_cycle=check_in_cycle), params: timestamp="{timestamp}", datatype="{datatype}", tier="pet", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, f"tier_pet"), + get_pattern_log(config, f"tier_pet", time), group: "tier-evt" resources: @@ -96,8 +96,7 @@ rule build_pet: mem_swap=50, run: shell_string = ( - f"{swenv} python3 -B " - f"{basedir}/../scripts/build_evt.py " + f'{execenv_smk_py_script(config, "build_tier_evt")}' f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " @@ -126,25 +125,27 @@ for evt_tier in ("evt", "pet"): lambda wildcards: sorted( get_filelist_full_wildcards( wildcards, - setup, - get_pattern_tier_raw(setup), + config, + get_pattern_tier_raw(config), tier, ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"), ) ), output: - get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle), + get_pattern_tier( + config, f"{evt_tier}_concat", check_in_cycle=check_in_cycle + ), params: timestamp="all", datatype="{datatype}", - lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat", - ro_input=lambda _, input: utils.as_ro(setup, input), + ro_input=lambda _, input: utils.as_ro(config, input), log: - get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"), + get_pattern_log_concat(config, f"tier_{evt_tier}_concat", time), group: "tier-evt" shell: - "{swenv} {params.lh5concat_exe} --verbose --overwrite " + f'{execenv_smk_py_script(config, "lh5concat")}' + "--verbose --overwrite " "--output {output} " "-- {params.ro_input} &> {log}" diff --git a/workflow/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk index 32d6175..d92a5aa 100644 --- a/workflow/rules/filelist_gen.smk +++ b/workflow/rules/filelist_gen.smk @@ -105,30 +105,30 @@ def get_keys(keypart): return filekeys -def get_pattern(setup, tier): +def get_pattern(config, tier): """ Helper function to get the search pattern for the given tier, some tiers such as skm need to refer to a different pattern when looking for files as only phy files are taken to skm others are only taken to pet """ if tier == "blind": - fn_pattern = patt.get_pattern_tier(setup, "raw", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, "raw", check_in_cycle=False) elif tier in ("skm", "pet_concat"): - fn_pattern = patt.get_pattern_tier(setup, "pet", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, "pet", check_in_cycle=False) elif tier == "evt_concat": - fn_pattern = patt.get_pattern_tier(setup, "evt", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, "evt", check_in_cycle=False) elif tier == "daq": - fn_pattern = patt.get_pattern_tier_daq(setup, extension="{ext}") + fn_pattern = patt.get_pattern_tier_daq(config, extension="{ext}") else: - fn_pattern = patt.get_pattern_tier(setup, tier, check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, tier, check_in_cycle=False) return fn_pattern -def concat_phy_filenames(setup, phy_filenames, tier): +def concat_phy_filenames(config, phy_filenames, tier): """ This function concatenates the files from the same run together """ - fn_pattern = patt.get_pattern(setup, tier) + fn_pattern = patt.get_pattern(config, tier) # group files by run sorted_phy_filenames = patt.run_grouper(phy_filenames) phy_filenames = [] @@ -136,7 +136,7 @@ def concat_phy_filenames(setup, phy_filenames, tier): for run in sorted_phy_filenames: key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) out_key = FileKey.get_path_from_filekey( - key, patt.get_pattern_tier(setup, tier, check_in_cycle=False) + key, patt.get_pattern_tier(config, tier, check_in_cycle=False) )[0] phy_filenames.append(out_key) @@ -145,7 +145,7 @@ def concat_phy_filenames(setup, phy_filenames, tier): def build_filelist( - setup, + config, filekeys, search_pattern, tier, @@ -157,7 +157,7 @@ def build_filelist( and tier. It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict. """ - fn_pattern = get_pattern(setup, tier) + fn_pattern = get_pattern(config, tier) if ignore_keys is None: ignore_keys = [] @@ -177,11 +177,11 @@ def build_filelist( else: if tier == "blind" and _key.datatype in blind_datatypes: filename = FileKey.get_path_from_filekey( - _key, patt.get_pattern_tier_raw_blind(setup) + _key, patt.get_pattern_tier_raw_blind(config) ) elif tier == "skm": filename = FileKey.get_path_from_filekey( - _key, patt.get_pattern_tier(setup, "pet", check_in_cycle=False) + _key, patt.get_pattern_tier(config, "pet", check_in_cycle=False) ) elif tier == "daq": filename = FileKey.get_path_from_filekey( @@ -223,14 +223,14 @@ def build_filelist( if tier in concat_tiers: phy_filenames = concat_phy_filenames( - setup, phy_filenames, tier + config, phy_filenames, tier ) # concat phy files return phy_filenames + other_filenames def get_filelist( - wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None + wildcards, config, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): file_selection = wildcards.label.split("-", 1)[0] # remove the file selection from the keypart @@ -242,7 +242,7 @@ def get_filelist( filekeys = get_keys(keypart) return build_filelist( - setup, + config, filekeys, search_pattern, wildcards.tier, @@ -253,7 +253,7 @@ def get_filelist( def get_filelist_full_wildcards( wildcards, - setup, + config, search_pattern, tier, ignore_keys_file=None, @@ -268,7 +268,7 @@ def get_filelist_full_wildcards( filekeys = get_keys(keypart) return build_filelist( - setup, + config, filekeys, search_pattern, tier, diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 0af7590..5d83174 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -6,333 +6,51 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from legenddataflow.pars_loading import ParsCatalog from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog from pathlib import Path from legenddataflow.patterns import ( - get_pattern_pars_tmp_channel, - get_pattern_plts_tmp_channel, - get_pattern_log_channel, - get_pattern_pars, - get_pattern_plts, get_pattern_tier, - get_pattern_pars_tmp, get_pattern_log, - get_pattern_pars, + get_pattern_pars_tmp, ) +from legenddataflow.execenv import execenv_smk_py_script hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_hit"], "lar": ["par_hit"]}, ) -hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" -if hit_par_cat_file.is_file(): - hit_par_cat_file.unlink() -Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) - -# This rule builds the qc using the calibration dsp files and fft files -rule build_qc: - input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - fft_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - qc_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "qc")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "qc")), - log: - get_pattern_log_channel(setup, "pars_hit_qc"), - group: - "par-hit" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_qc.py " - "--log {log} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--configs {configs} " - "--metadata {meta} " - "--plot_path {output.plot_file} " - "--save_path {output.qc_file} " - "--pulser_file {input.pulser} " - "--cal_files {input.files} " - "--fft_files {input.fft_files} " - "--overwrite_files {input.overwrite_files} " - - -# This rule builds the energy calibration using the calibration dsp files -rule build_energy_calibration: - input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ctc_dict=ancient( - lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "dsp" - ) - ), - inplots=get_pattern_plts_tmp_channel(setup, "hit", "qc"), - in_hit_dict=get_pattern_pars_tmp_channel(setup, "hit", "qc"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - ecal_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "energy_cal")), - results_file=temp( - get_pattern_pars_tmp_channel( - setup, "hit", "energy_cal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "energy_cal")), - log: - get_pattern_log_channel(setup, "pars_hit_energy_cal"), - group: - "par-hit" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_ecal.py " - "--log {log} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--configs {configs} " - "--metadata {meta} " - "--plot_path {output.plot_file} " - "--results_path {output.results_file} " - "--save_path {output.ecal_file} " - "--inplot_dict {input.inplots} " - "--in_hit_dict {input.in_hit_dict} " - "--ctc_dict {input.ctc_dict} " - "--pulser_file {input.pulser} " - "--files {input.files}" +include: "channel_merge.smk" -# This rule builds the a/e calibration using the calibration dsp files -rule build_aoe_calibration: - input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "energy_cal"), - eres_file=get_pattern_pars_tmp_channel( - setup, "hit", "energy_cal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(setup, "hit", "energy_cal"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal")), - aoe_results=temp( - get_pattern_pars_tmp_channel( - setup, "hit", "aoe_cal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal")), - log: - get_pattern_log_channel(setup, "pars_hit_aoe_cal"), - group: - "par-hit" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_aoe.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--pulser_file {input.pulser} " - "--ecal_file {input.ecal_file} " - "{input.files}" - - -# This rule builds the lq calibration using the calibration dsp files -rule build_lq_calibration: - input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal"), - eres_file=get_pattern_pars_tmp_channel( - setup, "hit", "aoe_cal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "hit")), - lq_results=temp( - get_pattern_pars_tmp_channel(setup, "hit", "objects", extension="pkl") - ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit")), - log: - get_pattern_log_channel(setup, "pars_hit_lq_cal"), - group: - "par-hit" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_lq.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--pulser_file {input.pulser} " - "--ecal_file {input.ecal_file} " - "{input.files}" - - -rule build_pars_hit_objects: - input: - lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "hit", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - output: - get_pattern_pars( - setup, - "hit", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - ro_input=lambda _, input: ro(input), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {params.ro_input} " - "--output {output} " - "--channelmap {meta} " - - -rule build_plts_hit: - input: - lambda wildcards: get_plt_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "hit", - basedir, - det_status, - chan_maps, - ), - output: - get_pattern_plts(setup, "hit"), - params: - ro_input=lambda _, input: ro(input), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {params.ro_input} " - "--output {output} " - "--channelmap {meta} " - - -rule build_pars_hit: - input: - infiles=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "hit", - basedir, - det_status, - chan_maps, - ), - plts=get_pattern_plts(setup, "hit"), - objects=get_pattern_pars( - setup, - "hit", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - output: - get_pattern_pars(setup, "hit", check_in_cycle=check_in_cycle), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {params.ro_input[infiles]} " - "--output {output} " - "--channelmap {meta} " +build_merge_rules("hit", lh5_merge=False) rule build_hit: input: - dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False), pars_file=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "hit" + config, wildcards.timestamp, "hit" ), output: - tier_file=get_pattern_tier(setup, "hit", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "hit_db"), + tier_file=get_pattern_tier(config, "hit", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "hit_db"), params: timestamp="{timestamp}", datatype="{datatype}", tier="hit", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, "tier_hit"), + get_pattern_log(config, "tier_hit", time), group: "tier-hit" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_hit.py " + f'{execenv_smk_py_script(config, "build_tier_hit")}' f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk new file mode 100644 index 0000000..8143f82 --- /dev/null +++ b/workflow/rules/hit_pars_geds.smk @@ -0,0 +1,205 @@ +""" +Snakemake rules for processing hit tier. This is done in 4 steps: +- extraction of calibration curves(s) for each channel from cal data +- extraction of psd calibration parameters for each channel from cal data +- combining of all channels into single pars files with associated plot and results files +- running build hit over all channels using par file +""" + +from pathlib import Path +from legenddataflow.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_pars, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_log, + get_pattern_pars, +) +from legenddataflow.execenv import execenv_smk_py_script + + +# This rule builds the qc using the calibration dsp files and fft files +rule build_qc: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + fft_files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-fft-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + qc_file=temp(get_pattern_pars_tmp_channel(config, "hit", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "qc")), + log: + get_pattern_log_channel(config, "pars_hit_qc", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_qc")}' + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--configs {configs} " + "--metadata {meta} " + "--plot_path {output.plot_file} " + "--save_path {output.qc_file} " + "--pulser_file {input.pulser} " + "--cal_files {input.files} " + "--fft_files {input.fft_files} " + "--overwrite_files {input.overwrite_files} " + + +# This rule builds the energy calibration using the calibration dsp files +rule build_energy_calibration: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ctc_dict=ancient( + lambda wildcards: ParsCatalog.get_par_file( + config, wildcards.timestamp, "dsp" + ) + ), + inplots=get_pattern_plts_tmp_channel(config, "hit", "qc"), + in_hit_dict=get_pattern_pars_tmp_channel(config, "hit", "qc"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + ecal_file=temp(get_pattern_pars_tmp_channel(config, "hit", "energy_cal")), + results_file=temp( + get_pattern_pars_tmp_channel( + config, "hit", "energy_cal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "energy_cal")), + log: + get_pattern_log_channel(config, "pars_hit_energy_cal", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--configs {configs} " + "--metadata {meta} " + "--plot_path {output.plot_file} " + "--results_path {output.results_file} " + "--save_path {output.ecal_file} " + "--inplot_dict {input.inplots} " + "--in_hit_dict {input.in_hit_dict} " + "--ctc_dict {input.ctc_dict} " + "--pulser_file {input.pulser} " + "--files {input.files}" + + +# This rule builds the a/e calibration using the calibration dsp files +rule build_aoe_calibration: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "hit", "energy_cal"), + eres_file=get_pattern_pars_tmp_channel( + config, "hit", "energy_cal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "hit", "energy_cal"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit", "aoe_cal")), + aoe_results=temp( + get_pattern_pars_tmp_channel( + config, "hit", "aoe_cal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "aoe_cal")), + log: + get_pattern_log_channel(config, "pars_hit_aoe_cal", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--aoe_results {output.aoe_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--pulser_file {input.pulser} " + "--ecal_file {input.ecal_file} " + "{input.files}" + + +# This rule builds the lq calibration using the calibration dsp files +rule build_lq_calibration: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "hit", "aoe_cal"), + eres_file=get_pattern_pars_tmp_channel( + config, "hit", "aoe_cal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "hit", "aoe_cal"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit")), + lq_results=temp( + get_pattern_pars_tmp_channel(config, "hit", "objects", extension="pkl") + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit")), + log: + get_pattern_log_channel(config, "pars_hit_lq_cal", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_lq")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--lq_results {output.lq_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--pulser_file {input.pulser} " + "--ecal_file {input.ecal_file} " + "{input.files}" diff --git a/workflow/rules/main.smk b/workflow/rules/main.smk index e0d886e..a78784d 100644 --- a/workflow/rules/main.smk +++ b/workflow/rules/main.smk @@ -38,15 +38,15 @@ rule autogen_output: - generate lists of valid keys """ input: - filelist=os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + filelist=os.path.join(filelist_path(config), "{label}-{tier}.filelist"), output: gen_output="{label}-{tier}.gen", - summary_log=log_path(setup) + "/summary-{label}-{tier}-" + timestamp + ".log", - warning_log=log_path(setup) + "/warning-{label}-{tier}-" + timestamp + ".log", + summary_log=log_path(config) + "/summary-{label}-{tier}-" + timestamp + ".log", + warning_log=log_path(config) + "/warning-{label}-{tier}-" + timestamp + ".log", params: - valid_keys_path=os.path.join(pars_path(setup), "valid_keys"), - filedb_path=os.path.join(pars_path(setup), "filedb"), - setup=lambda wildcards: setup, + valid_keys_path=os.path.join(pars_path(config), "valid_keys"), + filedb_path=os.path.join(pars_path(config), "filedb"), + setup=lambda wildcards: config, basedir=basedir, threads: min(workflow.cores, 64) script: diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index 27e4f81..fa85971 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -6,901 +6,54 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from legenddataflow.pars_loading import ParsCatalog from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog from pathlib import Path from legenddataflow.utils import filelist_path, set_last_rule_name from legenddataflow.patterns import ( - get_pattern_pars_tmp_channel, - get_pattern_plts_tmp_channel, - get_pattern_log_channel, - get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, - get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script -pht_par_catalog = ds.ParsKeyResolve.get_par_catalog( +pht_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_pht"], "lar": ["par_pht"]}, ) -pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" -if pht_par_cat_file.is_file(): - pht_par_cat_file.unlink() -Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) - intier = "psp" -rule pht_checkpoint: - input: - files=os.path.join( - filelist_path(setup), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - output: - temp(get_pattern_pars_tmp_channel(setup, "pht", "check")), - shell: - "touch {output}" - - -qc_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - cal_files=part.get_filelists(partition, key, intier), - fft_files=part.get_filelists(partition, key, intier, datatype="fft"), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - check_files=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="check", - ), - overwrite_files=get_overwrite_file( - "pht", - timestamp=part.get_timestamp( - pht_par_catalog, - partition, - key, - tier="pht", - ), - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="qc", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="qc", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - name="par_pht_qc", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 30, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--overwrite_files {input.overwrite_files} " - "--pulser_files {input.pulser_files} " - "--fft_files {input.fft_files} " - "--cal_files {input.cal_files}" - - set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc") - - if key in qc_pht_rules: - qc_pht_rules[key].append(list(workflow.rules)[-1]) - else: - qc_pht_rules[key] = [list(workflow.rules)[-1]] - - -# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs -# This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_pht_qc: - input: - cal_files=os.path.join( - filelist_path(setup), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - fft_files=os.path.join( - filelist_path(setup), - "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"), - overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")), - log: - get_pattern_log_channel(setup, "par_pht_qc"), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--overwrite_files {input.overwrite_files} " - "--pulser_files {input.pulser_files} " - "--fft_files {input.fft_files} " - "--cal_files {input.cal_files}" - - -fallback_qc_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(qc_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_qc_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - - -# This rule builds the energy calibration using the calibration dsp files -rule build_per_energy_calibration: - input: - files=os.path.join( - filelist_path(setup), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"), - ctc_dict=ancient( - lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, intier - ) - ), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - tier="pht", - output: - ecal_file=temp(get_pattern_pars_tmp_channel(setup, "pht", "energy_cal")), - results_file=temp( - get_pattern_pars_tmp_channel( - setup, "pht", "energy_cal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "energy_cal")), - log: - get_pattern_log_channel(setup, "par_pht_energy_cal"), - group: - "par-pht" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_ecal.py " - "--log {log} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--configs {configs} " - "--tier {params.tier} " - "--metadata {meta} " - "--plot_path {output.plot_file} " - "--results_path {output.results_file} " - "--save_path {output.ecal_file} " - "--inplot_dict {input.inplots} " - "--in_hit_dict {input.pht_dict} " - "--ctc_dict {input.ctc_dict} " - "--pulser_file {input.pulser} " - "--files {input.files}" - - -part_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - files=part.get_filelists(partition, key, intier), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - ecal_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="energy_cal", - ), - eres_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="energy_cal_objects", - extension="pkl", - ), - inplots=part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="energy_cal", - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ) - ], - partcal_results=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal_objects", - extension="pkl", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - name="par_pht_partcal", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 15, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_partcal.py " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--metadata {meta} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - set_last_rule_name( - workflow, f"{key}-{partition}-build_pht_energy_super_calibrations" - ) - - if key in part_pht_rules: - part_pht_rules[key].append(list(workflow.rules)[-1]) - else: - part_pht_rules[key] = [list(workflow.rules)[-1]] - - -# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs -# This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_pht_energy_super_calibrations: - input: - files=os.path.join( - filelist_path(setup), - "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"), - eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "energy_cal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "energy_cal"), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "partcal")), - partcal_results=temp( - get_pattern_pars_tmp_channel( - setup, "pht", "partcal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "partcal")), - log: - get_pattern_log_channel(setup, "par_pht_partcal"), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_partcal.py " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--metadata {meta} " - "--inplots {input.inplots} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" +include: "channel_merge.smk" -fallback_pht_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(part_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_pht_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - -part_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - files=part.get_filelists(partition, key, intier), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - ecal_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ), - eres_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal_objects", - extension="pkl", - ), - inplots=part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ) - ], - aoe_results=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal_objects", - extension="pkl", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - name="par_pht_aoe", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 15, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_aoecal.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - set_last_rule_name( - workflow, f"{key}-{partition}-build_pht_aoe_calibrations" - ) - - if key in part_pht_rules: - part_pht_rules[key].append(list(workflow.rules)[-1]) - else: - part_pht_rules[key] = [list(workflow.rules)[-1]] - - -# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs -# This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_pht_aoe_calibrations: - input: - files=os.path.join( - filelist_path(setup), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "partcal"), - eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "partcal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "partcal"), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "aoecal")), - aoe_results=temp( - get_pattern_pars_tmp_channel( - setup, "pht", "aoecal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "aoecal")), - log: - get_pattern_log_channel(setup, "par_pht_aoe_cal"), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_aoecal.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - -fallback_pht_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(part_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_pht_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - -part_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - files=part.get_filelists(partition, key, intier), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - ecal_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ), - eres_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal_objects", - extension="pkl", - ), - inplots=part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - ) - ], - lq_results=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="objects", - extension="pkl", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - name="par_pht_lq", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 15, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_lqcal.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration") - - if key in part_pht_rules: - part_pht_rules[key].append(list(workflow.rules)[-1]) - else: - part_pht_rules[key] = [list(workflow.rules)[-1]] - - -# This rule builds the lq calibration using the calibration dsp files for the whole partition -rule build_pht_lq_calibration: - input: - files=os.path.join( - filelist_path(setup), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "aoecal"), - eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "aoecal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "aoecal"), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht")), - lq_results=temp( - get_pattern_pars_tmp_channel(setup, "pht", "objects", extension="pkl") - ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")), - log: - get_pattern_log_channel(setup, "par_pht_lq_cal"), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_lqcal.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - -fallback_pht_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(part_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_pht_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - - -rule build_pars_pht_objects: - input: - lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "pht", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - output: - get_pattern_pars( - setup, - "pht", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - - -rule build_plts_pht: - input: - lambda wildcards: get_plt_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "pht", - basedir, - det_status, - chan_maps, - ), - output: - get_pattern_plts(setup, "pht"), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - - -rule build_pars_pht: - input: - infiles=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "pht", - basedir, - det_status, - chan_maps, - ), - plts=get_pattern_plts(setup, "pht"), - objects=get_pattern_pars( - setup, - "pht", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - output: - get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), - group: - "merge-hit" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input.infiles} " - "--output {output} " +build_merge_rules("pht", lh5_merge=False) rule build_pht: input: - dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False), + dsp_file=get_pattern_tier(config, intier, check_in_cycle=False), pars_file=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "pht" + config, wildcards.timestamp, "pht" ), output: - tier_file=get_pattern_tier(setup, "pht", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "pht_db"), + tier_file=get_pattern_tier(config, "pht", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "pht_db"), params: timestamp="{timestamp}", datatype="{datatype}", tier="pht", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, "tier_pht"), + get_pattern_log(config, "tier_pht", time), group: "tier-pht" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_hit.py " + f'{execenv_smk_py_script(config, "build_tier_hit")}' f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk new file mode 100644 index 0000000..4e5e126 --- /dev/null +++ b/workflow/rules/pht_pars_geds.smk @@ -0,0 +1,768 @@ +""" +Snakemake rules for processing pht (partition hit) tier data. This is done in 4 steps: +- extraction of calibration curves(s) for each run for each channel from cal data +- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data +- combining of all channels into single pars files with associated plot and results files +- running build hit over all channels using par file +""" + +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from legenddataflow.utils import filelist_path, set_last_rule_name +from legenddataflow.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_log, + get_pattern_pars, +) +from legenddataflow.execenv import execenv_smk_py_script + +pht_par_catalog = ParsKeyResolve.get_par_catalog( + ["-*-*-*-cal"], + get_pattern_tier(config, "raw", check_in_cycle=False), + {"cal": ["par_pht"], "lar": ["par_pht"]}, +) + +intier = "psp" + +qc_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + cal_files=part.get_filelists(partition, key, intier), + fft_files=part.get_filelists(partition, key, intier, datatype="fft"), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + overwrite_files=get_overwrite_file( + "pht", + timestamp=part.get_timestamp( + pht_par_catalog, + partition, + key, + tier="pht", + ), + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="qc", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="qc", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_qc", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 30, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--overwrite_files {input.overwrite_files} " + "--pulser_files {input.pulser_files} " + "--fft_files {input.fft_files} " + "--cal_files {input.cal_files}" + + set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc") + + if key in qc_pht_rules: + qc_pht_rules[key].append(list(workflow.rules)[-1]) + else: + qc_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_qc: + input: + cal_files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + fft_files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qc")), + log: + get_pattern_log_channel(config, "par_pht_qc", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--overwrite_files {input.overwrite_files} " + "--pulser_files {input.pulser_files} " + "--fft_files {input.fft_files} " + "--cal_files {input.cal_files}" + + +fallback_qc_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(qc_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_qc_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + + +# This rule builds the energy calibration using the calibration dsp files +rule build_per_energy_calibration: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + pht_dict=get_pattern_pars_tmp_channel(config, "pht", "qc"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "qc"), + ctc_dict=ancient( + lambda wildcards: ParsCatalog.get_par_file( + config, wildcards.timestamp, intier + ) + ), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + tier="pht", + output: + ecal_file=temp(get_pattern_pars_tmp_channel(config, "pht", "energy_cal")), + results_file=temp( + get_pattern_pars_tmp_channel( + config, "pht", "energy_cal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "energy_cal")), + log: + get_pattern_log_channel(config, "par_pht_energy_cal", time), + group: + "par-pht" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--configs {configs} " + "--tier {params.tier} " + "--metadata {meta} " + "--plot_path {output.plot_file} " + "--results_path {output.results_file} " + "--save_path {output.ecal_file} " + "--inplot_dict {input.inplots} " + "--in_hit_dict {input.pht_dict} " + "--ctc_dict {input.ctc_dict} " + "--pulser_file {input.pulser} " + "--files {input.files}" + + +part_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + files=part.get_filelists(partition, key, intier), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + ecal_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="energy_cal", + ), + eres_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="energy_cal_objects", + extension="pkl", + ), + inplots=part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="energy_cal", + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ) + ], + partcal_results=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal_objects", + extension="pkl", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_partcal", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--metadata {meta} " + "--fit_results {output.partcal_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + set_last_rule_name( + workflow, f"{key}-{partition}-build_pht_energy_super_calibrations" + ) + + if key in part_pht_rules: + part_pht_rules[key].append(list(workflow.rules)[-1]) + else: + part_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_energy_super_calibrations: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"), + eres_file=get_pattern_pars_tmp_channel( + config, "pht", "energy_cal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "partcal")), + partcal_results=temp( + get_pattern_pars_tmp_channel( + config, "pht", "partcal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "partcal")), + log: + get_pattern_log_channel(config, "par_pht_partcal", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--metadata {meta} " + "--inplots {input.inplots} " + "--fit_results {output.partcal_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + +fallback_pht_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(part_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_pht_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + +part_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + files=part.get_filelists(partition, key, intier), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + ecal_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ), + eres_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal_objects", + extension="pkl", + ), + inplots=part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ) + ], + aoe_results=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal_objects", + extension="pkl", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_aoe", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--aoe_results {output.aoe_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + set_last_rule_name( + workflow, f"{key}-{partition}-build_pht_aoe_calibrations" + ) + + if key in part_pht_rules: + part_pht_rules[key].append(list(workflow.rules)[-1]) + else: + part_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_aoe_calibrations: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "partcal"), + eres_file=get_pattern_pars_tmp_channel( + config, "pht", "partcal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "pht", "partcal"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "aoecal")), + aoe_results=temp( + get_pattern_pars_tmp_channel( + config, "pht", "aoecal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "aoecal")), + log: + get_pattern_log_channel(config, "par_pht_aoe_cal", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--aoe_results {output.aoe_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + +fallback_pht_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(part_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_pht_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + +part_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + files=part.get_filelists(partition, key, intier), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + ecal_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ), + eres_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal_objects", + extension="pkl", + ), + inplots=part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + ) + ], + lq_results=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="objects", + extension="pkl", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_lq", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--lq_results {output.lq_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration") + + if key in part_pht_rules: + part_pht_rules[key].append(list(workflow.rules)[-1]) + else: + part_pht_rules[key] = [list(workflow.rules)[-1]] + + +# This rule builds the lq calibration using the calibration dsp files for the whole partition +rule build_pht_lq_calibration: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "aoecal"), + eres_file=get_pattern_pars_tmp_channel( + config, "pht", "aoecal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "pht", "aoecal"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")), + lq_results=temp( + get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl") + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")), + log: + get_pattern_log_channel(config, "par_pht_lq_cal", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--lq_results {output.lq_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + +fallback_pht_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(part_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_pht_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_pars_geds_fast.smk similarity index 88% rename from workflow/rules/pht_fast.smk rename to workflow/rules/pht_pars_geds_fast.smk index 75d8e7e..2379753 100644 --- a/workflow/rules/pht_fast.smk +++ b/workflow/rules/pht_pars_geds_fast.smk @@ -11,6 +11,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script pht_fast_rules = {} @@ -96,6 +97,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_fast", ), group: @@ -104,8 +106,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 12, runtime=300, shell: - "{swenv} python3 -B " - f"{basedir}/../scripts/pars_pht_fast.py " + f'{execenv_smk_py_script(config, "par_geds_pht_fast")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -136,35 +137,34 @@ for key, dataset in part.datasets.items(): rule par_pht_fast: input: files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"), eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "energy_cal_objects", extension="pkl" + config, "pht", "energy_cal_objects", extension="pkl" ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "energy_cal"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")), partcal_results=temp( - get_pattern_pars_tmp_channel(setup, "pht", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl") ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")), log: - get_pattern_log_channel(setup, "par_pht_fast"), + get_pattern_log_channel(config, "par_pht_fast", time), group: "par-pht" resources: mem_swap=50, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_fast.py " + f'{execenv_smk_py_script(config, "par_geds_pht_fast")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index d55fbcc..e264ca4 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -14,151 +14,27 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) -psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" -if psp_par_cat_file.is_file(): - psp_par_cat_file.unlink() -Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) - -rule build_pars_psp_objects: - input: - lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "psp", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - output: - get_pattern_pars( - setup, - "psp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - group: - "merge-psp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - "--channelmap {meta} " - - -rule build_plts_psp: - input: - lambda wildcards: get_plt_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "psp", - basedir, - det_status, - chan_maps, - ), - output: - get_pattern_plts(setup, "psp"), - group: - "merge-psp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - "--channelmap {meta} " +include: "channel_merge.smk" -rule build_pars_psp_db: - input: - lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "psp", - basedir, - det_status, - chan_maps, - ), - output: - temp( - get_pattern_pars_tmp( - setup, - "psp", - datatype="cal", - ) - ), - group: - "merge-psp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--input {input} " - "--output {output} " - "--channelmap {meta} " - - -rule build_pars_psp: - input: - in_files=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - setup, - "psp", - datatype="cal", - ), - plts=get_pattern_plts(setup, "psp"), - objects=get_pattern_pars( - setup, - "psp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - output: - out_file=get_pattern_pars( - setup, - "psp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), - group: - "merge-psp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - "--channelmap {meta} " +build_merge_rules("psp", lh5_merge=True, lh5_tier="dsp") rule build_psp: input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), + raw_file=get_pattern_tier(config, "raw", check_in_cycle=False), pars_file=ancient( lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "psp" + config, wildcards.timestamp, "psp" ) ), params: @@ -166,18 +42,17 @@ rule build_psp: datatype="{datatype}", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, output: - tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "psp_db"), + tier_file=get_pattern_tier(config, "psp", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "psp_db"), log: - get_pattern_log(setup, "tier_psp"), + get_pattern_log(config, "tier_psp", time), group: "tier-dsp" resources: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--tier psp " f"--configs {ro(configs)} " diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 9e14cad..8d53220 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -4,23 +4,25 @@ Snakemake rules for processing psp (partition dsp) tier data. - extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data """ -from legenddataflow.pars_loading import pars_catalog -from legenddataflow.create_pars_keylist import pars_key_resolve -from legenddataflow.utils import par_psp_path, par_dsp_path, set_last_rule_name +from legenddataflow.utils import set_last_rule_name +from legenddataflow.create_pars_keylist import ParsKeyResolve from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, get_pattern_log, get_pattern_pars, + get_pattern_tier, ) +from legenddataflow.execenv import execenv_smk_py_script -psp_par_catalog = pars_key_resolve.get_par_catalog( +psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) + psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): @@ -87,6 +89,7 @@ for key, dataset in part.datasets.items(): partition, key, "psp", + time, name="par_psp", ), group: @@ -94,8 +97,7 @@ for key, dataset in part.datasets.items(): resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/par_psp_geds.py " + f'{execenv_smk_py_script(config, "par_geds_psp_average")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -118,30 +120,29 @@ for key, dataset in part.datasets.items(): # Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs # This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_par_psp: +rule build_par_psp_fallback: input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"), - dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"), - dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"), + dsp_pars=get_pattern_pars_tmp_channel(config, "dsp", "eopt"), + dsp_objs=get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl"), + dsp_plots=get_pattern_plts_tmp_channel(config, "dsp"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")), + psp_pars=temp(get_pattern_pars_tmp_channel(config, "psp", "eopt")), psp_objs=temp( - get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "psp", "objects", extension="pkl") ), - psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), + psp_plots=temp(get_pattern_plts_tmp_channel(config, "psp")), log: - get_pattern_log_channel(setup, "pars_psp"), + get_pattern_log_channel(config, "pars_psp", time), group: "par-psp" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/par_psp.py " + f'{execenv_smk_py_script(config, "par_geds_psp_average")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -167,21 +168,22 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" + ), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "psp", "svm_hyperpars") ).replace("hyperpars.json", "train.lh5"), output: - dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), + dsp_pars=get_pattern_pars(config, "psp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + str(get_pattern_log(config, "pars_psp_svm", time)).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_build_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}' "--log {log} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " @@ -190,19 +192,18 @@ rule build_svm_psp: rule build_pars_psp_svm: input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"), - svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"), + dsp_pars=get_pattern_pars_tmp_channel(config, "psp_eopt"), + svm_model=get_pattern_pars(config, "psp", "svm", "pkl"), output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "psp")), log: - get_pattern_log_channel(setup, "pars_dsp_svm"), + get_pattern_log_channel(config, "pars_dsp_svm", time), group: "par-dsp" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}' "--log {log} " "--input_file {input.dsp_pars} " "--output_file {output.dsp_pars} " diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index 982ab4e..a5cd954 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -11,6 +11,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script intier = "psp" @@ -57,6 +58,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_qc_phy", ), group: @@ -65,8 +67,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 20, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc_phy.py " + f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -89,7 +90,7 @@ for key, dataset in part.datasets.items(): rule build_pht_qc_phy: input: phy_files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-phy-" + f"{intier}.filelist", ), params: @@ -97,18 +98,17 @@ rule build_pht_qc_phy: channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qcphy")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qcphy")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qcphy")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qcphy")), log: - get_pattern_log_channel(setup, "pars_pht_qc_phy"), + get_pattern_log_channel(config, "pars_pht_qc_phy", time), group: "par-pht" resources: mem_swap=60, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc_phy.py " + f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -133,7 +133,7 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_plts_pht_phy: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, @@ -142,12 +142,11 @@ rule build_plts_pht_phy: name="qcphy", ), output: - get_pattern_plts(setup, "pht", "qc_phy"), + get_pattern_plts(config, "pht", "qc_phy"), group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -155,7 +154,7 @@ rule build_plts_pht_phy: rule build_pars_pht_phy: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, @@ -163,13 +162,12 @@ rule build_pars_pht_phy: chan_maps, name="qcphy", ), - plts=get_pattern_plts(setup, "pht", "qc_phy"), + plts=get_pattern_plts(config, "pht", "qc_phy"), output: - get_pattern_pars(setup, "pht", name="qc_phy", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "pht", name="qc_phy", check_in_cycle=check_in_cycle), group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index f647095..b0040fd 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -7,13 +7,14 @@ from legenddataflow.patterns import ( ) from legenddataflow.utils import set_last_rule_name from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.execenv import execenv_smk_py_script raw_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], [ - get_pattern_tier_daq_unsorted(setup, extension="*"), - get_pattern_tier_daq(setup, extension="*"), - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier_daq_unsorted(config, extension="*"), + get_pattern_tier_daq(config, extension="*"), + get_pattern_tier(config, "raw", check_in_cycle=False), ], {"cal": ["par_raw"]}, ) @@ -24,23 +25,22 @@ rule build_raw_orca: This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file """ input: - get_pattern_tier_daq(setup, extension="orca"), + get_pattern_tier_daq(config, extension="orca"), params: timestamp="{timestamp}", datatype="{datatype}", ro_input=lambda _, input: ro(input), output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_raw"), + get_pattern_log(config, "tier_raw", time), group: "tier-raw" resources: mem_swap=110, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_orca.py " + f'{execenv_smk_py_script(config, "build_tier_raw_orca")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " @@ -54,23 +54,22 @@ rule build_raw_fcio: This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file """ input: - get_pattern_tier_daq(setup, extension="fcio"), + get_pattern_tier_daq(config, extension="fcio"), params: timestamp="{timestamp}", datatype="{datatype}", ro_input=lambda _, input: ro(input), output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_raw"), + get_pattern_log(config, "tier_raw", time), group: "tier-raw" resources: mem_swap=110, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_fcio.py " + f'{execenv_smk_py_script(config, "build_tier_raw_fcio")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " @@ -85,7 +84,7 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=str(get_pattern_tier(setup, "raw", check_in_cycle=False)).replace( + tier_file=str(get_pattern_tier(config, "raw", check_in_cycle=False)).replace( "{datatype}", "phy" ), blind_file=get_blinding_curve_file, @@ -94,17 +93,18 @@ rule build_raw_blind: datatype="phy", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, output: - get_pattern_tier_raw_blind(setup), + get_pattern_tier_raw_blind(config), log: - str(get_pattern_log(setup, "tier_raw_blind")).replace("{datatype}", "phy"), + str(get_pattern_log(config, "tier_raw_blind", time)).replace( + "{datatype}", "phy" + ), group: "tier-raw" resources: mem_swap=110, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_raw_blind.py " + f'{execenv_smk_py_script(config, "build_tier_raw_blind")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index d3c5d51..7a4a686 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -8,26 +8,26 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_log_concat, ) +from legenddataflow.execenv import execenv_smk_py_script rule build_skm: input: - get_pattern_tier(setup, "pet_concat", check_in_cycle=False), + get_pattern_tier(config, "pet_concat", check_in_cycle=False), output: - get_pattern_tier(setup, "skm", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "skm", check_in_cycle=check_in_cycle), params: timestamp="20230410T000000Z", datatype="phy", ro_input=lambda _, input: ro(input), log: - get_pattern_log_concat(setup, "tier_skm"), + get_pattern_log_concat(config, "tier_skm", time), group: "tier-skm" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_skm.py " + f'{execenv_smk_py_script(config, "build_tier_skm")}' f"--configs {ro(configs)} " "--timestamp {params.timestamp} " "--log {log} " diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index 6fa85a9..afb080c 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -8,28 +8,28 @@ from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_log_channel, ) +from legenddataflow.execenv import execenv_smk_py_script # This rule builds the tcm files each raw file rule build_tier_tcm: input: - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), params: timestamp="{timestamp}", datatype="{datatype}", input=lambda _, input: ro(input), output: - get_pattern_tier(setup, "tcm", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "tcm", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_tcm"), + get_pattern_log(config, "tier_tcm", time), group: "tier-tcm" resources: runtime=300, mem_swap=20, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_tcm.py " + f'{execenv_smk_py_script(config, "build_tier_tcm")}' "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " @@ -41,7 +41,7 @@ rule build_tier_tcm: rule build_pulser_ids: input: os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-tcm.filelist" ), params: input=lambda _, input: ro(input), @@ -49,16 +49,15 @@ rule build_pulser_ids: datatype="cal", channel="{channel}", output: - pulser=temp(get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids")), + pulser=temp(get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids")), log: - get_pattern_log_channel(setup, "tcm_pulsers"), + get_pattern_log_channel(config, "tcm_pulsers", time), group: "tier-tcm" resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_tcm_pulser.py " + f'{execenv_smk_py_script(config, "par_geds_tcm_pulser")}' "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py index c11e6e5..7870e46 100644 --- a/workflow/src/legenddataflow/FileKey.py +++ b/workflow/src/legenddataflow/FileKey.py @@ -5,6 +5,7 @@ import re import string from collections import namedtuple +from itertools import product from pathlib import Path from .patterns import ( @@ -21,10 +22,16 @@ def regex_from_filepattern(filepattern): f = [] + wildcards = [] last = 0 for match in re.compile(r"\{(?P[\w]+)\}").finditer(filepattern): + f.append(re.escape(filepattern[last : match.start()])) wildcard = match.group("name") - f.append(f"(?P={wildcard})") + if wildcard in wildcards: + f.append(f"(?P={wildcard})") + else: + wildcards.append(wildcard) + f.append(f"(?P<{wildcard}>.+)") last = match.end() f.append(re.escape(filepattern[last:])) f.append("$") @@ -66,7 +73,7 @@ def get_filekey_from_filename(cls, filename): def get_filekey_from_pattern(cls, filename, pattern=None): if isinstance(pattern, Path): pattern = pattern.as_posix() - + filename = str(filename) key_pattern_rx = re.compile( regex_from_filepattern(cls.key_pattern if pattern is None else pattern) ) @@ -101,9 +108,23 @@ def parse_keypart(cls, keypart): return cls(**d) def expand(self, file_pattern, **kwargs): - wildcard_dict = dict(**self._asdict(), **kwargs) + file_pattern = str(file_pattern) + wildcard_dict = self._asdict() + if kwargs is not None: + for key, value in kwargs.items(): + wildcard_dict[key] = value + wildcard_dict = { + wildcard: [wildcard_value] + if isinstance(wildcard_value, str) + else wildcard_value + for wildcard, wildcard_value in wildcard_dict.items() + } formatter = string.Formatter() - return [formatter.vformat(file_pattern, (), wildcard_dict)] + result = [] + for combo in product(*wildcard_dict.values()): + substitution = dict(zip(list(wildcard_dict), combo)) + result.append(formatter.vformat(file_pattern, (), substitution)) + return result def get_path_from_filekey(self, pattern, **kwargs): if kwargs is None: diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py index ce06c1d..b2ce781 100644 --- a/workflow/src/legenddataflow/cal_grouping.py +++ b/workflow/src/legenddataflow/cal_grouping.py @@ -170,6 +170,7 @@ def get_log_file( dataset, channel, tier, + processing_timestamp, experiment="l200", datatype="cal", name=None, @@ -183,12 +184,17 @@ def get_log_file( datatype=datatype, name=name, ) - fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) - if channel == "default": - fk.channel = "{channel}" + if len(par_files) > 0: + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) + if channel == "default": + fk.channel = "{channel}" + else: + fk.channel = channel + return fk.get_path_from_filekey( + get_pattern_log_channel(self.setup, name, processing_timestamp) + )[0] else: - fk.channel = channel - return fk.get_path_from_filekey(get_pattern_log_channel(self.setup, name))[0] + return "log.log" def get_timestamp( self, catalog, dataset, channel, tier, experiment="l200", datatype="cal" @@ -202,8 +208,11 @@ def get_timestamp( datatype=datatype, name=None, ) - fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) - return fk.timestamp + if len(par_files) > 0: + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) + return fk.timestamp + else: + return "20200101T000000Z" def get_wildcard_constraints(self, dataset, channel): if channel == "default": @@ -221,6 +230,6 @@ def get_wildcard_constraints(self, dataset, channel): out_string = "" for channel in exclude_chans: out_string += f"(?!{channel})" - return out_string + r"^[VPCB]\d{1}\w{5}$" + return out_string + r"[PCVB]{1}\d{1}\w{5}" else: - return r"^[VPCB]\d{1}\w{5}$" + return r"[PCVB]{1}\d{1}\w{5}" diff --git a/workflow/src/legenddataflow/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py index 9325a6d..5f51828 100644 --- a/workflow/src/legenddataflow/create_pars_keylist.py +++ b/workflow/src/legenddataflow/create_pars_keylist.py @@ -7,10 +7,9 @@ import warnings from pathlib import Path -import snakemake as smk import yaml -from .FileKey import FileKey, ProcessingFileKey +from .FileKey import FileKey, ProcessingFileKey, regex_from_filepattern from .patterns import par_validity_pattern @@ -107,13 +106,10 @@ def get_keys(keypart, search_pattern): wildcard_dict = dict(ext="*", **d._asdict()) else: wildcard_dict = d._asdict() - try: - tier_pattern_rx = re.compile( - smk.io.regex_from_filepattern(str(search_pattern)) - ) - except AttributeError: - tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) - fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0] + + tier_pattern_rx = re.compile(regex_from_filepattern(str(search_pattern))) + key = FileKey.get_filekey_from_pattern(search_pattern, search_pattern) + fn_glob_pattern = key.get_path_from_filekey(search_pattern, **wildcard_dict)[0] p = Path(fn_glob_pattern) parts = p.parts[p.is_absolute() :] files = Path(p.root).glob(str(Path(*parts))) diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index 6a0239d..9fd2ac0 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -60,19 +60,19 @@ def execenv_python(config, aslist=False): return " ".join(cmdline), cmdenv -def execenv_smk_py_script(workflow, config, scriptname, aslist=False): +def execenv_smk_py_script(config, scriptname, aslist=False): """Returns the command used to run a Python script for a Snakemake rule. For example: `apptainer run image.sif python path/to/script.py` """ config = AttrsDict(config) - cmdline, cmdenv = execenv_python(config, aslist=True) - cmdline.append(f"{workflow.basedir}/scripts/{scriptname}") + cmdline, _ = execenv_prefix(config, aslist=True) + cmdline.append(f"{config.paths.install}/bin/{scriptname} ") if aslist: - return cmdline, cmdenv - return " ".join(cmdline), cmdenv + return cmdline + return " ".join(cmdline) def dataprod() -> None: @@ -240,9 +240,8 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): "pip", "--no-cache", "install", - str(config_loc), + str(config_loc), # +"[dataprod]" ] - if args.editable: cmd_expr.insert(-1, "--editable") diff --git a/workflow/src/legenddataflow/patterns.py b/workflow/src/legenddataflow/patterns.py index 71f5db4..b05be0a 100644 --- a/workflow/src/legenddataflow/patterns.py +++ b/workflow/src/legenddataflow/patterns.py @@ -289,9 +289,10 @@ def get_pattern_plts(setup, tier, name=None): ) -def get_pattern_log(setup, processing_step): +def get_pattern_log(setup, processing_step, time): return ( Path(f"{tmp_log_path(setup)}") + / time / processing_step / ( "{experiment}-{period}-{run}-{datatype}-{timestamp}-" @@ -301,9 +302,10 @@ def get_pattern_log(setup, processing_step): ) -def get_pattern_log_channel(setup, processing_step): +def get_pattern_log_channel(setup, processing_step, time): return ( Path(f"{tmp_log_path(setup)}") + / time / processing_step / ( "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" @@ -313,9 +315,10 @@ def get_pattern_log_channel(setup, processing_step): ) -def get_pattern_log_concat(setup, processing_step): +def get_pattern_log_concat(setup, processing_step, time): return ( Path(f"{tmp_log_path(setup)}") + / time / processing_step / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log") ) diff --git a/workflow/src/legenddataflow/scripts/blinding_calibration.py b/workflow/src/legenddataflow/scripts/blinding_calibration.py deleted file mode 100644 index e4b79f2..0000000 --- a/workflow/src/legenddataflow/scripts/blinding_calibration.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -This script applies a simple calibration to the daqenergy for all channels, -it does this using a peak search, matching the peaks to the given ones -and deriving a simple scaling relation from adc to keV. -""" - -import argparse -import logging -import pickle as pkl -from pathlib import Path - -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from lgdo import lh5 -from pygama.pargen.energy_cal import HPGeCalibration - -mpl.use("agg") - -argparser = argparse.ArgumentParser() -argparser.add_argument("--files", help="files", nargs="*", type=str) - -argparser.add_argument("--blind_curve", help="blind_curve", type=str) -argparser.add_argument("--plot_file", help="out plot path", type=str) - -argparser.add_argument("--meta", help="meta", type=str) -argparser.add_argument("--configs", help="configs", type=str) -argparser.add_argument("--log", help="log", type=str) - -argparser.add_argument("--timestamp", help="timestamp", type=str) -argparser.add_argument("--datatype", help="datatype", type=str) -argparser.add_argument("--channel", help="channel", type=str) - -argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") -args = argparser.parse_args() - -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -log = logging.getLogger(__name__) - -# load in channel map -meta = LegendMetadata(args.meta, lazy=True) -chmap = meta.channelmap(args.timestamp) - -# if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True: -pars_dict = {} -# peaks to search for -peaks_keV = np.array( - [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50] -) - -E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as("np") -E_uncal = E_uncal[E_uncal > 200] -guess_keV = 2620 / np.nanpercentile(E_uncal, 99) # usual simple guess -Euc_min = peaks_keV[0] / guess_keV * 0.6 -Euc_max = peaks_keV[-1] / guess_keV * 1.1 -dEuc = 1 / guess_keV - -# daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins) - - -hpge_cal = HPGeCalibration( - "daqenergy", - peaks_keV, - guess_keV, - 0, - uncal_is_int=True, - debug_mode=args.debug, -) - -# Run the rough peak search -detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(E_uncal) - -log.info(f"{len(detected_peaks_locs)} peaks found:") -log.info("\t Energy | Position ") -for i, (Li, Ei) in enumerate(zip(detected_peaks_locs, detected_peaks_keV)): - log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5)) # noqa: G003 - -# dictionary to pass to build hit -out_dict = { - "pars": { - "operations": { - "daqenergy_cal": { - "expression": "daqenergy*a", - "parameters": {"a": round(roughpars[0], 5)}, - } - } - } -} - -# plot to check thagt the calibration is correct with zoom on 2.6 peak -fig = plt.figure(figsize=(8, 10)) -ax = plt.subplot(211) -ax.hist(E_uncal * roughpars[0], bins=np.arange(0, 3000, 1), histtype="step") -ax.set_ylabel("counts") -ax.set_yscale("log") -ax2 = plt.subplot(212) -ax2.hist( - E_uncal * roughpars[0], - bins=np.arange(2600, 2630, 1 * roughpars[0]), - histtype="step", -) -ax2.set_xlabel("energy (keV)") -ax2.set_ylabel("counts") -plt.suptitle(args.channel) -with Path(args.plot_file).open("wb") as w: - pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) -plt.close() - -Props.write_to_file(args.blind_curve, out_dict) diff --git a/workflow/src/legenddataflow/scripts/build_dsp.py b/workflow/src/legenddataflow/scripts/build_dsp.py deleted file mode 100644 index 7e44bb6..0000000 --- a/workflow/src/legenddataflow/scripts/build_dsp.py +++ /dev/null @@ -1,167 +0,0 @@ -import argparse -import re -import time -import warnings -from pathlib import Path - -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from dspeed import build_dsp -from legendmeta import LegendMetadata -from lgdo import lh5 - -from ..log import build_log - - -def replace_list_with_array(dic): - for key, value in dic.items(): - if isinstance(value, dict): - dic[key] = replace_list_with_array(value) - elif isinstance(value, list): - dic[key] = np.array(value, dtype="float32") - else: - pass - return dic - - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--tier", help="Tier", type=str, required=True) - -argparser.add_argument( - "--pars_file", help="database file for detector", nargs="*", default=[] -) -argparser.add_argument("--input", help="input file", type=str) - -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--db_file", help="db file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] -if args.tier in ["dsp", "psp"]: - config_dict = config_dict["tier_dsp"] -elif args.tier in ["ann", "pan"]: - config_dict = config_dict["tier_ann"] -else: - msg = f"Tier {args.tier} not supported" - raise ValueError(msg) - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"]["processing_chain"] -settings_dict = config_dict["options"].get("settings", {}) -if isinstance(settings_dict, str): - settings_dict = Props.read_from(settings_dict) - -meta = LegendMetadata(path=args.metadata) -chan_map = meta.channelmap(args.timestamp, system=args.datatype) - -if args.tier in ["ann", "pan"]: - channel_dict = { - f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) - for chan, file in channel_dict.items() - } -else: - channel_dict = { - f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) - for chan, file in channel_dict.items() - } -db_files = [ - par_file - for par_file in args.pars_file - if Path(par_file).suffix in (".json", ".yaml", ".yml") -] - -database_dic = Props.read_from(db_files, subst_pathvar=True) -database_dic = replace_list_with_array(database_dic) - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0, 99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -start = time.time() - -build_dsp( - args.input, - temp_output, - {}, - database=database_dic, - chan_config=channel_dict, - write_mode="r", - buffer_len=settings_dict.get("buffer_len", 1000), - block_width=settings_dict.get("block_width", 16), -) - -log.info(f"build_dsp finished in {time.time()-start}") -Path(temp_output).rename(args.output) - -key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") - -if args.tier in ["dsp", "psp"]: - raw_channels = [ - channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel) - ] - raw_fields = [ - field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/") - ] - - outputs = {} - channels = [] - for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - - full_dict = { - "valid_fields": { - "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, - "dsp": outputs, - }, - "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, - } -else: - outputs = {} - channels = [] - for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - - full_dict = { - "valid_fields": { - "ann": outputs, - }, - "valid_keys": {key: {"valid_channels": {"ann": channels}}}, - } - -Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/build_evt.py b/workflow/src/legenddataflow/scripts/build_evt.py deleted file mode 100644 index b4723b4..0000000 --- a/workflow/src/legenddataflow/scripts/build_evt.py +++ /dev/null @@ -1,182 +0,0 @@ -import argparse -import json -import time -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import Props, TextDB -from legendmeta import LegendMetadata -from lgdo.types import Array -from pygama.evt import build_evt - -from ..log import build_log - -sto = lh5.LH5Store() - - -def find_matching_values_with_delay(arr1, arr2, jit_delay): - matching_values = [] - - # Create an array with all possible delay values - delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay - - for delay in delays: - arr2_delayed = arr2 + delay - - # Find matching values and indices - mask = np.isin(arr1, arr2_delayed, assume_unique=True) - matching_values.extend(arr1[mask]) - - return np.unique(matching_values) - - -argparser = argparse.ArgumentParser() -argparser.add_argument("--hit_file", help="hit file", type=str) -argparser.add_argument("--dsp_file", help="dsp file", type=str) -argparser.add_argument("--tcm_file", help="tcm file", type=str) -argparser.add_argument("--ann_file", help="ann file") -argparser.add_argument("--xtc_file", help="xtc file", type=str) -argparser.add_argument("--par_files", help="par files", nargs="*") - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--tier", help="Tier", type=str, required=True) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata path", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--output", help="output file", type=str) -args = argparser.parse_args() - -# load in config -configs = TextDB(args.configs, lazy=True) -if args.tier in ("evt", "pet"): - rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_evt" - ] - -else: - msg = "unknown tier" - raise ValueError(msg) - -config_dict = rule_dict["inputs"] -evt_config_file = config_dict["evt_config"] - -log = build_log(rule_dict, args.log) - -meta = LegendMetadata(args.metadata, lazy=True) -chmap = meta.channelmap(args.timestamp) - -evt_config = Props.read_from(evt_config_file) - -if args.datatype in ("phy", "xtc"): - exp_string = evt_config["operations"]["geds___energy"]["expression"] - exp_string = exp_string.replace( - 'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"' - ) - exp_string = exp_string.replace( - 'cal_par_files=""', f"cal_par_files={args.par_files}" - ) - exp_string2 = exp_string.replace('return_mode="energy"', 'return_mode="tcm_index"') - - file_path_config = { - "operations": { - "geds___energy": {"expression": exp_string}, - "_geds___tcm_idx": {"expression": exp_string2}, - } - } - - log.debug(json.dumps(file_path_config, indent=2)) - - Props.add_to(evt_config, file_path_config) - -# block for snakemake to fill in channel lists -for field, dic in evt_config["channels"].items(): - if isinstance(dic, dict): - chans = chmap.map("system", unique=False)[dic["system"]] - if "selectors" in dic: - try: - for k, val in dic["selectors"].items(): - chans = chans.map(k, unique=False)[val] - except KeyError: - chans = None - if chans is not None: - chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] - else: - chans = [] - evt_config["channels"][field] = chans - -log.debug(json.dumps(evt_config["channels"], indent=2)) - -t_start = time.time() -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -file_table = { - "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), -} - -if args.ann_file is not None: - file_table["ann"] = (args.ann_file, "dsp", "ch{}") - -table = build_evt( - file_table, - evt_config, -) - -if "muon_config" in config_dict and config_dict["muon_config"] is not None: - muon_config = Props.read_from(config_dict["muon_config"]["evt_config"]) - field_config = Props.read_from(config_dict["muon_config"]["field_config"]) - # block for snakemake to fill in channel lists - for field, dic in muon_config["channels"].items(): - if isinstance(dic, dict): - chans = chmap.map("system", unique=False)[dic["system"]] - if "selectors" in dic: - try: - for k, val in dic["selectors"].items(): - chans = chans.map(k, unique=False)[val] - except KeyError: - chans = None - if chans is not None: - chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] - else: - chans = [] - muon_config["channels"][field] = chans - - trigger_timestamp = table[field_config["ged_timestamp"]["table"]][ - field_config["ged_timestamp"]["field"] - ].nda - if "hardware_tcm_2" in lh5.ls(args.tcm_file): - muon_table = build_evt( - { - "tcm": (args.tcm_file, "hardware_tcm_2", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), - }, - muon_config, - ) - - muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda - muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda - if len(muon_timestamp[muon_tbl_flag]) > 0: - is_muon_veto_triggered = find_matching_values_with_delay( - trigger_timestamp, muon_timestamp[muon_tbl_flag], field_config["jitter"] - ) - muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered) - else: - muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) - else: - muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) - table[field_config["output_field"]["table"]].add_column( - field_config["output_field"]["field"], Array(muon_flag) - ) - -sto.write(obj=table, name="evt", lh5_file=args.output, wo_mode="a") - -t_elap = time.time() - t_start -log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") diff --git a/workflow/src/legenddataflow/scripts/build_fdb.py b/workflow/src/legenddataflow/scripts/build_fdb.py deleted file mode 100644 index 93a3567..0000000 --- a/workflow/src/legenddataflow/scripts/build_fdb.py +++ /dev/null @@ -1,84 +0,0 @@ -import argparse -import logging -from pathlib import Path - -import numpy as np -from dbetto.catalog import Props -from lgdo import lh5 -from pygama.flow.file_db import FileDB - -argparser = argparse.ArgumentParser() -argparser.add_argument("--config", required=True) -argparser.add_argument("--scan-path", required=True) -argparser.add_argument("--output", required=True) -argparser.add_argument("--log") -argparser.add_argument("--assume-nonsparse", action="store_true") -args = argparser.parse_args() - -config = Props.read_from(args.config) - -if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -else: - logging.basicConfig(level=logging.DEBUG) - -logging.getLogger("legendmeta").setLevel(logging.INFO) -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - -fdb = FileDB(config, scan=False) -fdb.scan_files([args.scan_path]) -fdb.scan_tables_columns(dir_files_conform=True) - -# augment dataframe with earliest timestamp found in file - -default = np.finfo("float64").max -timestamps = np.zeros(len(fdb.df), dtype="float64") - -for i, row in enumerate(fdb.df.itertuples()): - store = lh5.LH5Store( - base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True - ) - - # list of first timestamps for each channel - loc_timestamps = np.full(len(row.raw_tables), fill_value=default, dtype="float64") - - msg = f"finding first timestamp in {fdb.data_dir}/{fdb.tier_dirs['raw']}/{row.raw_file}" - log.info(msg) - - found = False - for j, table in enumerate(row.raw_tables): - try: - loc_timestamps[j] = store.read( - fdb.table_format["raw"].format(ch=table) + "/timestamp", - row.raw_file.strip("/"), - n_rows=1, - )[0][0] - found = True - except KeyError: - pass - - if found and args.assume_nonsparse: - break - - if (loc_timestamps == default).all() or not found: - msg = "something went wrong! no valid first timestamp found" - raise RuntimeError(msg) - - timestamps[i] = np.min(loc_timestamps) - - msg = f"found {timestamps[i]}" - log.info(msg) - - if timestamps[i] < 0 or timestamps[i] > 4102444800: - msg = "something went wrong! timestamp does not make sense" - raise RuntimeError(msg) - -fdb.df["first_timestamp"] = timestamps - -fdb.to_disk(args.output, wo_mode="of") diff --git a/workflow/src/legenddataflow/scripts/build_hit.py b/workflow/src/legenddataflow/scripts/build_hit.py deleted file mode 100644 index 47b0fa0..0000000 --- a/workflow/src/legenddataflow/scripts/build_hit.py +++ /dev/null @@ -1,96 +0,0 @@ -import argparse -import time -from pathlib import Path - -from dbetto.catalog import Props -from legendmeta import LegendMetadata, TextDB -from lgdo import lh5 -from pygama.hit.build_hit import build_hit - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input file", type=str) -argparser.add_argument("--pars_file", help="hit pars file", nargs="*") - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--tier", help="Tier", type=str, required=True) - -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--db_file", help="db file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True) -if args.tier == "hit" or args.tier == "pht": - config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_hit" - ] -else: - msg = "unknown tier" - raise ValueError(msg) - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"]["hit_config"] -settings_dict = config_dict["options"].get("settings", {}) -if isinstance(settings_dict, str): - settings_dict = Props.read_from(settings_dict) - -meta = LegendMetadata(path=args.metadata) -chan_map = meta.channelmap(args.timestamp, system=args.datatype) - -pars_dict = Props.read_from(args.pars_file) -pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} - -hit_dict = {} -channels_present = lh5.ls(args.input) -for channel in pars_dict: - chan_pars = pars_dict[channel].copy() - try: - detector = chan_map.map("daq.rawid")[int(channel[2:])].name - if detector in channel_dict: - cfg_dict = Props.read_from(channel_dict[detector]) - Props.add_to(cfg_dict, chan_pars) - chan_pars = cfg_dict - - if channel in channels_present: - hit_dict[f"{channel}/dsp"] = chan_pars - except KeyError: - pass - -t_start = time.time() -Path(args.output).parent.mkdir(parents=True, exist_ok=True) -build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) -t_elap = time.time() - t_start -log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") - -hit_outputs = {} -hit_channels = [] -for channel, file in channel_dict.items(): - output = Props.read_from(file)["outputs"] - in_dict = False - for entry in hit_outputs: - if hit_outputs[entry]["fields"] == output: - hit_outputs[entry]["channels"].append(channel) - in_dict = True - if in_dict is False: - hit_outputs[f"group{len(list(hit_outputs))+1}"] = { - "channels": [channel], - "fields": output, - } - hit_channels.append(channel) - -key = args.output.replace(f"-tier_{args.tier}.lh5", "") - -full_dict = { - "valid_fields": {args.tier: hit_outputs}, - "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, -} - -Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/build_raw_blind.py b/workflow/src/legenddataflow/scripts/build_raw_blind.py deleted file mode 100644 index 3d42717..0000000 --- a/workflow/src/legenddataflow/scripts/build_raw_blind.py +++ /dev/null @@ -1,181 +0,0 @@ -""" -This script takes in raw data, applies the calibration to the daqenergy -and uses this to blind the data in a window of Qbb +- 25 keV. It copies over all -channels in a raw file, removing those events that fall within the ROI for Ge detectors -that have a daqenergy calibration curve and are not anti-coincidence only (AC). It removes -the whole event from all of the Ge and SiPM channels. - -In the Snakemake dataflow, this script only runs if the checkfile is found on disk, -but this is controlled by the Snakemake flow (presumably an error is thrown if the file -is not found). This script itself does not check for the existence of such a file. -""" - -import argparse -from pathlib import Path - -import numexpr as ne -import numpy as np -from dbetto.catalog import Props -from legendmeta import LegendMetadata, TextDB -from lgdo import lh5 - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input file", type=str) -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument( - "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*" -) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--metadata", help="metadata", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_raw" -] - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"] -hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] -blinding_settings = Props.read_from(config_dict["config"]) - -centroid = blinding_settings["centroid_in_keV"] # keV -width = blinding_settings["width_in_keV"] # keV - -# list of all channels and objects in the raw file -all_channels = lh5.ls(args.input) - -# list of Ge channels and SiPM channels with associated metadata -legendmetadata = LegendMetadata(args.metadata, lazy=True) -ged_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["geds"] - .map("daq.rawid") -) -spms_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["spms"] - .map("daq.rawid") -) -auxs_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["auxs"] - .map("daq.rawid") -) -blsn_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["bsln"] - .map("daq.rawid") -) -puls_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["puls"] - .map("daq.rawid") -) - -store = lh5.LH5Store() - -# rows that need blinding -toblind = np.array([]) - -# first, loop through the Ge detector channels, calibrate them and look for events that should be blinded -for chnum in list(ged_channels): - # skip Ge detectors that are anti-coincidence only or not able to be blinded for some other reason - if ged_channels[chnum]["analysis"]["is_blinded"] is False: - continue - - # load in just the daqenergy for now - daqenergy, _ = store.read(f"ch{chnum}/raw/daqenergy", args.input) - - # read in calibration curve for this channel - blind_curve = Props.read_from(args.blind_curve)[f"ch{chnum}"]["pars"]["operations"] - - # calibrate daq energy using pre existing curve - daqenergy_cal = ne.evaluate( - blind_curve["daqenergy_cal"]["expression"], - local_dict=dict( - daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"] - ), - ) - - # figure out which event indices should be blinded - toblind = np.append( - toblind, np.nonzero(np.abs(np.asarray(daqenergy_cal) - centroid) <= width)[0] - ) - -# remove duplicates -toblind = np.unique(toblind) - -# total number of events (from last Ge channel loaded, should be same for all Ge channels) -allind = np.arange(len(daqenergy)) - -# gets events that should not be blinded -tokeep = allind[np.logical_not(np.isin(allind, toblind))] - -# make some temp file to write the output to before renaming it -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" -Path(temp_output).parent.mkdir(parents=True, exist_ok=True) - -for channel in all_channels: - try: - chnum = int(channel[2::]) - except ValueError: - # if this isn't an interesting channel, just copy it to the output file - chobj, _ = store.read(channel, args.input, decompress=False) - store.write_object( - chobj, - channel, - lh5_file=temp_output, - wo_mode="w", - **hdf_settings, - ) - continue - - if ( - (chnum not in list(ged_channels)) - and (chnum not in list(spms_channels)) - and (chnum not in list(auxs_channels)) - and (chnum not in list(blsn_channels)) - and (chnum not in list(puls_channels)) - ): - # if this is a PMT or not included for some reason, just copy it to the output file - chobj, _ = store.read(channel + "/raw", args.input, decompress=False) - store.write_object( - chobj, - group=channel, - name="raw", - lh5_file=temp_output, - wo_mode="w", - **hdf_settings, - ) - continue - - # the rest should be the Ge and SiPM channels that need to be blinded - - # read in all of the data but only for the unblinded events - blinded_chobj, _ = store.read( - channel + "/raw", args.input, idx=tokeep, decompress=False - ) - - # now write the blinded data for this channel - store.write_object( - blinded_chobj, - group=channel, - name="raw", - lh5_file=temp_output, - wo_mode="w", - **hdf_settings, - ) - -# rename the temp file -Path(args.output).parent.mkdir(parents=True, exist_ok=True) -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/build_raw_fcio.py b/workflow/src/legenddataflow/scripts/build_raw_fcio.py deleted file mode 100644 index 176565a..0000000 --- a/workflow/src/legenddataflow/scripts/build_raw_fcio.py +++ /dev/null @@ -1,68 +0,0 @@ -import argparse -from copy import deepcopy -from pathlib import Path - -import numpy as np -from daq2lh5 import build_raw -from dbetto import TextDB -from dbetto.catalog import Props - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("input", help="input file", type=str) -argparser.add_argument("output", help="output file", type=str) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -config_dict = ( - TextDB(args.configs, lazy=True) - .on(args.timestamp, system=args.datatype) - .snakemake_rules.tier_raw_fcio -) - -log = build_log(config_dict, args.log) - -channel_dict = config_dict.inputs -settings = Props.read_from(channel_dict.settings) -channel_dict = channel_dict.out_spec -all_config = Props.read_from(channel_dict.gen_config) - -chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") - -if "geds_config" in channel_dict: - raise NotImplementedError() - -if "spms_config" in channel_dict: - spm_config = Props.read_from(channel_dict.spms_config) - spm_channels = chmap.spms.map("daq.rawid") - - for rawid, chinfo in spm_channels.items(): - cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"]) - cfg_block["key_list"] = [chinfo.daq.fc_channel] - spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block - - spm_config["FCEventDecoder"].pop("__output_table_name__") - - Props.add_to(all_config, spm_config) - -if "auxs_config" in channel_dict: - raise NotImplementedError() - -if "muon_config" in channel_dict: - raise NotImplementedError() - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) - -# rename the temp file -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/build_raw_orca.py b/workflow/src/legenddataflow/scripts/build_raw_orca.py deleted file mode 100644 index 72b5ac6..0000000 --- a/workflow/src/legenddataflow/scripts/build_raw_orca.py +++ /dev/null @@ -1,108 +0,0 @@ -import argparse -import logging -from pathlib import Path - -import numpy as np -from daq2lh5 import build_raw -from dbetto import TextDB -from dbetto.catalog import Props - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("input", help="input file", type=str) -argparser.add_argument("output", help="output file", type=str) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--log", help="log file") -args = argparser.parse_args() - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_raw" -] - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"] -settings = Props.read_from(channel_dict["settings"]) -channel_dict = channel_dict["out_spec"] -all_config = Props.read_from(channel_dict["gen_config"]) - -chmap = TextDB(args.chan_maps, lazy=True) - -if "geds_config" in list(channel_dict): - ged_config = Props.read_from(channel_dict["geds_config"]) - - ged_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["geds"] - .map("daq.rawid") - ) - - ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) - Props.add_to(all_config, ged_config) - -if "spms_config" in list(channel_dict): - spm_config = Props.read_from(channel_dict["spms_config"]) - - spm_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["spms"] - .map("daq.rawid") - ) - - spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) - Props.add_to(all_config, spm_config) - -if "auxs_config" in list(channel_dict): - aux_config = Props.read_from(channel_dict["auxs_config"]) - aux_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["auxs"] - .map("daq.rawid") - ) - aux_channels += list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["puls"] - .map("daq.rawid") - ) - aux_channels += list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["bsln"] - .map("daq.rawid") - ) - top_key = next(iter(aux_config)) - aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted( - aux_channels - ) - Props.add_to(all_config, aux_config) - -if "muon_config" in list(channel_dict): - muon_config = Props.read_from(channel_dict["muon_config"]) - muon_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["muon"] - .map("daq.rawid") - ) - top_key = next(iter(muon_config)) - muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted( - muon_channels - ) - Props.add_to(all_config, muon_config) - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) - -# rename the temp file -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/build_skm.py b/workflow/src/legenddataflow/scripts/build_skm.py deleted file mode 100644 index 9411b1b..0000000 --- a/workflow/src/legenddataflow/scripts/build_skm.py +++ /dev/null @@ -1,96 +0,0 @@ -import argparse - -import awkward as ak -from dbetto import TextDB -from dbetto.catalog import Props -from lgdo import lh5 -from lgdo.types import Array, Struct, Table, VectorOfVectors - -from ..log import build_log - - -def get_all_out_fields(input_table, out_fields, current_field=""): - for key in input_table: - field = input_table[key] - key_string = f"{current_field}.{key}" - if isinstance(field, (Table, Struct)): - get_all_out_fields(field, out_fields, key_string) - else: - if key_string not in out_fields: - out_fields.append(key_string) - return out_fields - - -argparser = argparse.ArgumentParser() -argparser.add_argument("--evt_file", help="evt file", required=True) -argparser.add_argument("--configs", help="configs", required=True) -argparser.add_argument("--datatype", help="datatype", required=True) -argparser.add_argument("--timestamp", help="timestamp", required=True) -argparser.add_argument("--log", help="log file", default=None) -argparser.add_argument("--output", help="output file", required=True) -args = argparser.parse_args() - -# load in config -config_dict = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)[ - "snakemake_rules" -]["tier_skm"] - -log = build_log(config_dict, args.log) - - -skm_config_file = config_dict["inputs"]["skm_config"] -evt_filter = Props.read_from(skm_config_file)["evt_filter"] -out_fields = Props.read_from(skm_config_file)["keep_fields"] - -store = lh5.LH5Store() - -evt = lh5.read_as("evt", args.evt_file, "ak") - -# remove unwanted events -skm = eval(f"evt[{evt_filter}]") -# make it rectangular and make an LGDO Table -out_table = Table(skm) - -for field in out_fields: - items = field.split(".") - ptr1 = out_table - for item in items[:-1]: - ptr1 = ptr1[item] - - if isinstance(ptr1[items[-1]], Table): - out_fields.remove(field) - out_fields = get_all_out_fields( - ptr1[items[-1]], out_fields, current_field=field - ) - -# remove unwanted columns -out_table_skm = Table(size=len(out_table)) -for field in out_fields: - # table nesting is labeled by '.' in the config - items = field.split(".") - # get to actual nested field recursively - ptr1 = out_table - ptr2 = out_table_skm - for item in items[:-1]: - # make intermediate tables in new table - if item not in ptr2: - ptr2.add_field(item, Table(size=len(out_table))) - # get non-table LGDO recursively - ptr1 = ptr1[item] - ptr2 = ptr2[item] - - # finally add column to new table - if isinstance(ptr1[items[-1]], VectorOfVectors): - ptr2.add_field(items[-1], Array(ak.flatten(ptr1[items[-1]].view_as("ak")))) - else: - ptr2.add_field(items[-1], ptr1[items[-1]]) - attrs = ptr1[items[-1]].attrs - - # forward LGDO attributes - # attrs = evt[field.replace(".", "_")].attrs - for attr, val in attrs.items(): - if attr != "datatype": - ptr2.attrs[attr] = val - -# write-append to disk -store.write(out_table_skm, "skm", args.output, wo_mode="w") diff --git a/workflow/src/legenddataflow/scripts/build_tcm.py b/workflow/src/legenddataflow/scripts/build_tcm.py deleted file mode 100644 index 7e6ab73..0000000 --- a/workflow/src/legenddataflow/scripts/build_tcm.py +++ /dev/null @@ -1,53 +0,0 @@ -import argparse -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from daq2lh5.orca import orca_flashcam -from dbetto import TextDB -from dbetto.catalog import Props -from pygama.evt.build_tcm import build_tcm - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("input", help="input file", type=str) -argparser.add_argument("output", help="output file", type=str) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["tier_tcm"] - -log = build_log(config_dict, args.log) - -settings = Props.read_from(config_dict["inputs"]["config"]) - -rng = np.random.default_rng() -temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -# get the list of channels by fcid -ch_list = lh5.ls(args.input, "/ch*") -fcid_channels = {} -for ch in ch_list: - key = int(ch[2:]) - fcid = orca_flashcam.get_fcid(key) - if fcid not in fcid_channels: - fcid_channels[fcid] = [] - fcid_channels[fcid].append(f"/{ch}/raw") - -# make a hardware_tcm_[fcid] for each fcid -for fcid, fcid_dict in fcid_channels.items(): - build_tcm( - [(args.input, fcid_dict)], - out_file=temp_output, - out_name=f"hardware_tcm_{fcid}", - wo_mode="o", - **settings, - ) - -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/check_blinding.py b/workflow/src/legenddataflow/scripts/check_blinding.py deleted file mode 100644 index faf800d..0000000 --- a/workflow/src/legenddataflow/scripts/check_blinding.py +++ /dev/null @@ -1,108 +0,0 @@ -""" -This script checks that the blinding for a particular channel is still valid, -it does this by taking the calibration curve stored in the overrides, applying it -to the daqenergy, running a peak search over the calibrated energy and checking that -there are peaks within 5keV of the 583 and 2614 peaks. If the detector is in ac mode -then it will skip the check. -""" - -import argparse -import pickle as pkl -from pathlib import Path - -import matplotlib as mpl -import matplotlib.pyplot as plt -import numexpr as ne -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from lgdo import lh5 -from pygama.math.histogram import get_hist -from pygama.pargen.energy_cal import get_i_local_maxima - -from ..log import build_log - -mpl.use("Agg") - -argparser = argparse.ArgumentParser() -argparser.add_argument("--files", help="files", nargs="*", type=str) -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--plot_file", help="plot file", type=str) -argparser.add_argument( - "--blind_curve", help="blinding curves file", nargs="*", type=str -) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--channel", help="channel", type=str) -argparser.add_argument("--metadata", help="channel", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] - -log = build_log(config_dict, args.log) - -# get the usability status for this channel -chmap = ( - LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") -) -det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"] - -# read in calibration curve for this channel -blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"] - -# load in the data -daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as( - "np" -) - -# calibrate daq energy using pre existing curve -daqenergy_cal = ne.evaluate( - blind_curve["daqenergy_cal"]["expression"], - local_dict=dict(daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]), -) - -# bin with 1 keV bins and get maxs -hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1)) -maxs = get_i_local_maxima(hist, delta=25) -log.info(f"peaks found at : {maxs}") - -# plot the energy spectrum to check calibration -fig = plt.figure(figsize=(8, 10)) -ax = plt.subplot(211) -ax.hist(daqenergy_cal, bins=np.arange(0, 3000, 1), histtype="step") -ax.set_ylabel("counts") -ax.set_yscale("log") -ax2 = plt.subplot(212) -ax2.hist( - daqenergy_cal, - bins=np.arange(2600, 2630, 1 * blind_curve["daqenergy_cal"]["parameters"]["a"]), - histtype="step", -) -ax2.set_xlabel("energy (keV)") -ax2.set_ylabel("counts") -plt.suptitle(args.channel) -with Path(args.plot_file).open("wb") as w: - pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) -plt.close() - -# check for peaks within +- 5keV of 2614 and 583 to ensure blinding still -# valid and if so create file else raise error. if detector is in ac mode it -# will always pass this check -if ( - np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) -) or det_status is False: - Path(args.output).mkdir(parents=True, exist_ok=True) - Props.write_to( - args.output, - { - "threshold_adc": np.nanmin(daqenergy), - "threshold_kev": np.nanmin(daqenergy_cal), - }, - ) -else: - msg = "peaks not found in daqenergy" - raise RuntimeError(msg) diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py index e3892eb..eff7a90 100644 --- a/workflow/src/legenddataflow/scripts/complete_run.py +++ b/workflow/src/legenddataflow/scripts/complete_run.py @@ -7,9 +7,9 @@ import time from pathlib import Path +from .. import patterns +from .. import utils as ut from ..FileKey import FileKey -from . import patterns -from . import utils as ut print("INFO: dataflow ran successfully, now few final checks and scripts") diff --git a/workflow/src/legenddataflow/scripts/create_chankeylist.py b/workflow/src/legenddataflow/scripts/create_chankeylist.py index a75be8b..9566068 100644 --- a/workflow/src/legenddataflow/scripts/create_chankeylist.py +++ b/workflow/src/legenddataflow/scripts/create_chankeylist.py @@ -4,27 +4,29 @@ from dbetto import TextDB from legendmeta import LegendMetadata -argparser = argparse.ArgumentParser() -argparser.add_argument("--det_status", help="det_status", type=str, required=True) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) -argparser.add_argument("--output_file", help="output_file", type=str, required=True) -args = argparser.parse_args() +def create_chankeylist() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--det_status", help="det_status", type=str, required=True) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) -det_status = TextDB(args.det_status, lazy=True) -status_map = det_status.statuses.on(args.timestamp, system=args.datatype) + argparser.add_argument("--output_file", help="output_file", type=str, required=True) + args = argparser.parse_args() -channel_map = LegendMetadata(args.channelmap, lazy=True) -chmap = channel_map.channelmaps.on(args.timestamp) + det_status = TextDB(args.det_status, lazy=True) + status_map = det_status.statuses.on(args.timestamp, system=args.datatype) -channels = [ - chan - for chan in status_map - if status_map[chan]["processable"] is True and chmap[chan].system == "geds" -] -Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) -with Path(args.output_file).open("w") as f: - for chan in channels: - f.write(f"{chan}\n") + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmaps.on(args.timestamp) + + channels = [ + chan + for chan in status_map + if status_map[chan]["processable"] is True and chmap[chan].system == "geds" + ] + Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.output_file).open("w") as f: + for chan in channels: + f.write(f"{chan}\n") diff --git a/workflow/src/legenddataflow/scripts/filedb.py b/workflow/src/legenddataflow/scripts/filedb.py new file mode 100644 index 0000000..d9b52d8 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/filedb.py @@ -0,0 +1,88 @@ +import argparse +import logging +from pathlib import Path + +import numpy as np +from dbetto.catalog import Props +from lgdo import lh5 +from pygama.flow.file_db import FileDB + + +def build_filedb() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--config", required=True) + argparser.add_argument("--scan-path", required=True) + argparser.add_argument("--output", required=True) + argparser.add_argument("--log") + argparser.add_argument("--assume-nonsparse", action="store_true") + args = argparser.parse_args() + + config = Props.read_from(args.config) + + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + else: + logging.basicConfig(level=logging.DEBUG) + + logging.getLogger("legendmeta").setLevel(logging.INFO) + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("h5py._conv").setLevel(logging.INFO) + + log = logging.getLogger(__name__) + + fdb = FileDB(config, scan=False) + fdb.scan_files([args.scan_path]) + fdb.scan_tables_columns(dir_files_conform=True) + + # augment dataframe with earliest timestamp found in file + + default = np.finfo("float64").max + timestamps = np.zeros(len(fdb.df), dtype="float64") + + for i, row in enumerate(fdb.df.itertuples()): + store = lh5.LH5Store( + base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True + ) + + # list of first timestamps for each channel + loc_timestamps = np.full( + len(row.raw_tables), fill_value=default, dtype="float64" + ) + + msg = f"finding first timestamp in {fdb.data_dir}/{fdb.tier_dirs['raw']}/{row.raw_file}" + log.info(msg) + + found = False + for j, table in enumerate(row.raw_tables): + try: + loc_timestamps[j] = store.read( + fdb.table_format["raw"].format(ch=table) + "/timestamp", + row.raw_file.strip("/"), + n_rows=1, + )[0][0] + found = True + except KeyError: + pass + + if found and args.assume_nonsparse: + break + + if (loc_timestamps == default).all() or not found: + msg = "something went wrong! no valid first timestamp found" + raise RuntimeError(msg) + + timestamps[i] = np.min(loc_timestamps) + + msg = f"found {timestamps[i]}" + log.info(msg) + + if timestamps[i] < 0 or timestamps[i] > 4102444800: + msg = "something went wrong! timestamp does not make sense" + raise RuntimeError(msg) + + fdb.df["first_timestamp"] = timestamps + + fdb.to_disk(args.output, wo_mode="of") diff --git a/workflow/src/legenddataflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py index d6fec7a..6fee6f5 100644 --- a/workflow/src/legenddataflow/scripts/merge_channels.py +++ b/workflow/src/legenddataflow/scripts/merge_channels.py @@ -24,138 +24,142 @@ def replace_path(d, old_path, new_path): return d -argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input file", nargs="*", type=str, required=True) -argparser.add_argument("--output", help="output file", type=str, required=True) -argparser.add_argument( - "--in_db", - help="in db file (used for when lh5 files referred to in db)", - type=str, - required=False, -) -argparser.add_argument( - "--out_db", - help="lh5 file (used for when lh5 files referred to in db)", - type=str, - required=False, -) -argparser.add_argument( - "--channelmap", - help="channelmap", - type=str, - required=False, - default=None, -) -argparser.add_argument( - "--timestamp", - help="timestamp", - type=str, - required=False, -) -args = argparser.parse_args() - -# change to only have 1 output file for multiple inputs -# don't care about processing step, check if extension matches - -channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input - -file_extension = Path(args.output).suffix - -if args.channelmap is not None: - channel_map = LegendMetadata(args.channelmap, lazy=True) - chmap = channel_map.channelmap(args.timestamp) -else: - chmap = None - -if file_extension == ".dat" or file_extension == ".dir": - out_file = Path(args.output).with_suffix("") -else: - out_file = args.output - -rng = np.random.default_rng() -temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -if file_extension in (".json", ".yaml", ".yml"): - out_dict = {} - for channel in channel_files: - if Path(channel).suffix == file_extension: - channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - if chmap is not None: - channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" +def merge_channels() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument( + "--input", help="input file", nargs="*", type=str, required=True + ) + argparser.add_argument("--output", help="output file", type=str, required=True) + argparser.add_argument( + "--in_db", + help="in db file (used for when lh5 files referred to in db)", + type=str, + required=False, + ) + argparser.add_argument( + "--out_db", + help="lh5 file (used for when lh5 files referred to in db)", + type=str, + required=False, + ) + argparser.add_argument( + "--channelmap", + help="channelmap", + type=str, + required=False, + default=None, + ) + argparser.add_argument( + "--timestamp", + help="timestamp", + type=str, + required=False, + ) + args = argparser.parse_args() + + # change to only have 1 output file for multiple inputs + # don't care about processing step, check if extension matches + + channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input + + file_extension = Path(args.output).suffix + + if args.channelmap is not None: + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmap(args.timestamp) + else: + chmap = None + + if file_extension == ".dat" or file_extension == ".dir": + out_file = Path(args.output).with_suffix("") + else: + out_file = args.output + + rng = np.random.default_rng() + temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + if file_extension in (".json", ".yaml", ".yml"): + out_dict = {} + for channel in channel_files: + if Path(channel).suffix == file_extension: + channel_dict = Props.read_from(channel) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel + out_dict[channel_name] = channel_dict else: - channel_name = fkey.channel - out_dict[channel_name] = channel_dict - else: - msg = "Output file extension does not match input file extension" - raise RuntimeError(msg) - - Props.write_to(out_file, out_dict) - -elif file_extension == ".pkl": - out_dict = {} - for channel in channel_files: - with Path(channel).open("rb") as r: - channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - if chmap is not None: - channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" - else: - channel_name = fkey.channel - out_dict[channel_name] = channel_dict - - with Path(temp_output).open("wb") as w: - pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - - Path(temp_output).rename(out_file) - -elif file_extension == ".dat" or file_extension == ".dir": - common_dict = {} - with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + msg = "Output file extension does not match input file extension" + raise RuntimeError(msg) + + Props.write_to(out_file, out_dict) + + elif file_extension == ".pkl": + out_dict = {} for channel in channel_files: with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name) - if chmap is not None: - channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" - else: - channel_name = fkey.channel - if isinstance(channel_dict, dict) and "common" in list(channel_dict): - chan_common_dict = channel_dict.pop("common") - common_dict[channel_name] = chan_common_dict - shelf[channel_name] = channel_dict - if len(common_dict) > 0: - shelf["common"] = common_dict - - -elif file_extension == ".lh5": - if args.in_db: - db_dict = Props.read_from(args.in_db) - for channel in channel_files: - if Path(channel).suffix == file_extension: fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) if chmap is not None: channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" else: channel_name = fkey.channel - tb_in = lh5.read(f"{channel_name}", channel) - - lh5.write( - tb_in, - name=channel_name, - lh5_file=temp_output, - wo_mode="a", - ) - if args.in_db: - db_dict[channel_name] = replace_path( - db_dict[channel_name], channel, args.output + out_dict[channel_name] = channel_dict + + with Path(temp_output).open("wb") as w: + pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + Path(temp_output).rename(out_file) + + elif file_extension == ".dat" or file_extension == ".dir": + common_dict = {} + with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + for channel in channel_files: + with Path(channel).open("rb") as r: + channel_dict = pkl.load(r) + fkey = ChannelProcKey.get_filekey_from_pattern( + Path(channel_files[0]).name + ) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel + if isinstance(channel_dict, dict) and "common" in list(channel_dict): + chan_common_dict = channel_dict.pop("common") + common_dict[channel_name] = chan_common_dict + shelf[channel_name] = channel_dict + if len(common_dict) > 0: + shelf["common"] = common_dict + + elif file_extension == ".lh5": + if args.in_db: + db_dict = Props.read_from(args.in_db) + for channel in channel_files: + if Path(channel).suffix == file_extension: + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel + tb_in = lh5.read(f"{channel_name}", channel) + + lh5.write( + tb_in, + name=channel_name, + lh5_file=temp_output, + wo_mode="a", ) - else: - msg = "Output file extension does not match input file extension" - raise RuntimeError(msg) - if args.out_db: - Props.write_to(args.out_db, db_dict) + if args.in_db: + db_dict[channel_name] = replace_path( + db_dict[channel_name], channel, args.output + ) + else: + msg = "Output file extension does not match input file extension" + raise RuntimeError(msg) + if args.out_db: + Props.write_to(args.out_db, db_dict) - Path(temp_output).rename(out_file) + Path(temp_output).rename(out_file) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py new file mode 100644 index 0000000..2b0004b --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py @@ -0,0 +1,148 @@ +import argparse +import logging +import pickle as pkl +import time +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from lgdo import Array, Table +from pygama.pargen.dplms_ge_dict import dplms_ge_dict + +from ....log import build_log + + +def par_geds_dsp_dplms() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) + argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str) + argparser.add_argument("--database", help="database", type=str, required=True) + + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) + argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True) + argparser.add_argument("--plot_path", help="plot_path", type=str) + + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_dplms"] + + log = build_log(config_dict, args.log) + + log = logging.getLogger(__name__) + sto = lh5.LH5Store() + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + configs = TextDB(args.configs).on(args.timestamp, system=args.datatype) + dsp_config = config_dict["inputs"]["proc_chain"][args.channel] + + dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] + dplms_dict = Props.read_from(dplms_json) + + db_dict = Props.read_from(args.database) + + if dplms_dict["run_dplms"] is True: + with Path(args.fft_raw_filelist).open() as f: + fft_files = sorted(f.read().splitlines()) + + t0 = time.time() + log.info("\nLoad fft data") + energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] + idxs = np.where(energies.nda == 0)[0] + raw_fft = sto.read( + f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs + )[0] + t1 = time.time() + log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") + + log.info("\nRunning event selection") + peaks_kev = np.array(dplms_dict["peaks_kev"]) + # kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] + + peaks_rounded = [int(peak) for peak in peaks_kev] + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ + "peak" + ].nda + ids = np.isin(peaks, peaks_rounded) + peaks = peaks[ids] + # idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] + + raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] + log.info( + f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}" + ) + + if isinstance(dsp_config, (str, list)): + dsp_config = Props.read_from(dsp_config) + + if args.plot_path: + out_dict, plot_dict = dplms_ge_dict( + raw_fft, + raw_cal, + dsp_config, + db_dict, + dplms_dict, + display=1, + ) + if args.inplots: + with Path(args.inplots).open("rb") as r: + inplot_dict = pkl.load(r) + inplot_dict.update({"dplms": plot_dict}) + + else: + out_dict = dplms_ge_dict( + raw_fft, + raw_cal, + dsp_config, + db_dict, + dplms_dict, + ) + + coeffs = out_dict["dplms"].pop("coefficients") + dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) + out_dict["dplms"]["coefficients"] = ( + f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" + ) + + log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") + else: + out_dict = {} + dplms_pars = Table(col_dict={"coefficients": Array([])}) + if args.inplots: + with Path(args.inplots).open("rb") as r: + inplot_dict = pkl.load(r) + else: + inplot_dict = {} + + db_dict.update(out_dict) + + Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) + sto.write( + Table(col_dict={"dplms": dplms_pars}), + name=channel, + lh5_file=args.lh5_path, + wo_mode="overwrite", + ) + + Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.dsp_pars, db_dict) + + if args.plot_path: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: + pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py new file mode 100644 index 0000000..4b755c2 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py @@ -0,0 +1,398 @@ +import argparse +import pickle as pkl +import time +import warnings +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +import pygama.pargen.energy_optimisation as om # noqa: F401 +import sklearn.gaussian_process.kernels as ker +from dbetto import TextDB +from dbetto.catalog import Props +from dspeed.units import unit_registry as ureg +from legendmeta import LegendMetadata +from pygama.math.distributions import hpge_peak +from pygama.pargen.dsp_optimize import ( + BayesianOptimizer, + run_bayesian_optimisation, + run_one_dsp, +) + +from ....log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) +warnings.filterwarnings(action="ignore", category=np.RankWarning) + + +def par_geds_dsp_eopt() -> None: + argparser = argparse.ArgumentParser() + + argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str) + + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument( + "--final_dsp_pars", help="final_dsp_pars", type=str, required=True + ) + argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str) + argparser.add_argument("--plot_path", help="plot_path", type=str) + + argparser.add_argument( + "--plot_save_path", help="plot_save_path", type=str, required=False + ) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] + + log = build_log(config_dict, args.log) + + sto = lh5.LH5Store() + t0 = time.time() + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + dsp_config = config_dict["inputs"]["processing_chain"][args.channel] + opt_json = config_dict["inputs"]["optimiser_config"][args.channel] + + opt_dict = Props.read_from(opt_json) + db_dict = Props.read_from(args.decay_const) + + if opt_dict.pop("run_eopt") is True: + peaks_kev = np.array(opt_dict["peaks"]) + kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]] + + kwarg_dicts_cusp = [] + kwarg_dicts_trap = [] + kwarg_dicts_zac = [] + for peak in peaks_kev: + peak_idx = np.where(peaks_kev == peak)[0][0] + kev_width = kev_widths[peak_idx] + + kwarg_dicts_cusp.append( + { + "parameter": "cuspEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + "bin_width": 5, + } + ) + kwarg_dicts_zac.append( + { + "parameter": "zacEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + "bin_width": 5, + } + ) + kwarg_dicts_trap.append( + { + "parameter": "trapEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + "bin_width": 5, + } + ) + + peaks_rounded = [int(peak) for peak in peaks_kev] + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ + "peak" + ].nda + ids = np.isin(peaks, peaks_rounded) + peaks = peaks[ids] + idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] + + tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] + + t1 = time.time() + log.info(f"Data Loaded in {(t1-t0)/60} minutes") + + if isinstance(dsp_config, (str, list)): + dsp_config = Props.read_from(dsp_config) + + dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"] + + init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) + full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]] + flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10 + + if flat_val < 1.0: + flat_val = 1.0 + elif flat_val > 4: + flat_val = 4 + flat_val = f"{flat_val}*us" + + db_dict["cusp"] = {"flat": flat_val} + db_dict["zac"] = {"flat": flat_val} + db_dict["etrap"] = {"flat": flat_val} + + tb_data.add_column("dt_eff", init_data["dt_eff"]) + + dsp_config["processors"].pop("dt_eff") + + dsp_config["outputs"] = ["zacEmax", "cuspEmax", "trapEmax", "dt_eff"] + + kwarg_dict = [ + { + "peak_dicts": kwarg_dicts_cusp, + "ctc_param": "dt_eff", + "idx_list": idx_list, + "peaks_kev": peaks_kev, + }, + { + "peak_dicts": kwarg_dicts_zac, + "ctc_param": "dt_eff", + "idx_list": idx_list, + "peaks_kev": peaks_kev, + }, + { + "peak_dicts": kwarg_dicts_trap, + "ctc_param": "dt_eff", + "idx_list": idx_list, + "peaks_kev": peaks_kev, + }, + ] + + fom = eval(opt_dict["fom"]) + out_field = opt_dict["fom_field"] + out_err_field = opt_dict["fom_err_field"] + sample_x = np.array(opt_dict["initial_samples"]) + + results_cusp = [] + results_zac = [] + results_trap = [] + + sample_y_cusp = [] + sample_y_zac = [] + sample_y_trap = [] + + err_y_cusp = [] + err_y_zac = [] + err_y_trap = [] + + for i, x in enumerate(sample_x): + db_dict["cusp"]["sigma"] = f"{x[0]}*us" + db_dict["zac"]["sigma"] = f"{x[0]}*us" + db_dict["etrap"]["rise"] = f"{x[0]}*us" + + log.info(f"Initialising values {i+1} : {db_dict}") + + tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) + + res = fom(tb_out, kwarg_dict[0]) + results_cusp.append(res) + sample_y_cusp.append(res[out_field]) + err_y_cusp.append(res[out_err_field]) + + res = fom(tb_out, kwarg_dict[1]) + results_zac.append(res) + sample_y_zac.append(res[out_field]) + err_y_zac.append(res[out_err_field]) + + res = fom(tb_out, kwarg_dict[2]) + results_trap.append(res) + sample_y_trap.append(res[out_field]) + err_y_trap.append(res[out_err_field]) + + log.info(f"{i+1} Finished") + + if np.isnan(sample_y_cusp).all(): + max_cusp = opt_dict["nan_default"] + else: + max_cusp = np.ceil(np.nanmax(sample_y_cusp) * 2) + if np.isnan(sample_y_zac).all(): + max_zac = opt_dict["nan_default"] + else: + max_zac = np.ceil(np.nanmax(sample_y_zac) * 2) + if np.isnan(sample_y_trap).all(): + max_trap = opt_dict["nan_default"] + else: + max_trap = np.ceil(np.nanmax(sample_y_trap) * 2) + + nan_vals = [max_cusp, max_zac, max_trap] + + for i in range(len(sample_x)): + if np.isnan(sample_y_cusp[i]): + results_cusp[i]["y_val"] = max_cusp + sample_y_cusp[i] = max_cusp + + if np.isnan(sample_y_zac[i]): + results_zac[i]["y_val"] = max_zac + sample_y_zac[i] = max_zac + + if np.isnan(sample_y_trap[i]): + results_trap[i]["y_val"] = max_trap + sample_y_trap[i] = max_trap + + kernel = ( + ker.ConstantKernel(2.0, constant_value_bounds="fixed") + + 1.0 * ker.RBF(1.0, length_scale_bounds=[0.5, 2.5]) + + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1)) + ) + + lambda_param = 5 + sampling_rate = tb_data["waveform_presummed"]["dt"][0] + sampling_unit = ureg.Quantity( + tb_data["waveform_presummed"]["dt"].attrs["units"] + ) + waveform_sampling = sampling_rate * sampling_unit + + bopt_cusp = BayesianOptimizer( + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, + fom_value=out_field, + fom_error=out_err_field, + ) + bopt_cusp.lambda_param = lambda_param + bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us") + + bopt_zac = BayesianOptimizer( + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, + fom_value=out_field, + fom_error=out_err_field, + ) + bopt_zac.lambda_param = lambda_param + bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us") + + bopt_trap = BayesianOptimizer( + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, + fom_value=out_field, + fom_error=out_err_field, + ) + bopt_trap.lambda_param = lambda_param + bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") + + bopt_cusp.add_initial_values( + x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp + ) + bopt_zac.add_initial_values( + x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac + ) + bopt_trap.add_initial_values( + x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap + ) + + best_idx = np.nanargmin(sample_y_cusp) + bopt_cusp.optimal_results = results_cusp[best_idx] + bopt_cusp.optimal_x = sample_x[best_idx] + + best_idx = np.nanargmin(sample_y_zac) + bopt_zac.optimal_results = results_zac[best_idx] + bopt_zac.optimal_x = sample_x[best_idx] + + best_idx = np.nanargmin(sample_y_trap) + bopt_trap.optimal_results = results_trap[best_idx] + bopt_trap.optimal_x = sample_x[best_idx] + + optimisers = [bopt_cusp, bopt_zac, bopt_trap] + + out_param_dict, out_results_list = run_bayesian_optimisation( + tb_data, + dsp_config, + [fom], + optimisers, + fom_kwargs=kwarg_dict, + db_dict=db_dict, + nan_val=nan_vals, + n_iter=opt_dict["n_iter"], + ) + + Props.add_to(db_dict, out_param_dict) + + # db_dict.update(out_param_dict) + + t2 = time.time() + log.info(f"Optimiser finished in {(t2-t1)/60} minutes") + + out_alpha_dict = {} + out_alpha_dict["cuspEmax_ctc"] = { + "expression": "cuspEmax*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["cuspEftp_ctc"] = { + "expression": "cuspEftp*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["zacEmax_ctc"] = { + "expression": "zacEmax*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["zacEftp_ctc"] = { + "expression": "zacEftp*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["trapEmax_ctc"] = { + "expression": "trapEmax*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["trapEftp_ctc"] = { + "expression": "trapEftp*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, + } + if "ctc_params" in db_dict: + db_dict["ctc_params"].update(out_alpha_dict) + else: + db_dict.update({"ctc_params": out_alpha_dict}) + + Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.qbb_grid_path).open("wb") as f: + pkl.dump(optimisers, f) + + else: + Path(args.qbb_grid_path).touch() + + Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.final_dsp_pars, db_dict) + + if args.plot_path: + if args.inplots: + with Path(args.inplots).open("rb") as r: + plot_dict = pkl.load(r) + else: + plot_dict = {} + + plot_dict["trap_optimisation"] = { + "kernel_space": bopt_trap.plot(init_samples=sample_x), + "acq_space": bopt_trap.plot_acq(init_samples=sample_x), + } + + plot_dict["cusp_optimisation"] = { + "kernel_space": bopt_cusp.plot(init_samples=sample_x), + "acq_space": bopt_cusp.plot_acq(init_samples=sample_x), + } + + plot_dict["zac_optimisation"] = { + "kernel_space": bopt_zac.plot(init_samples=sample_x), + "acq_space": bopt_zac.plot_acq(init_samples=sample_x), + } + + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as w: + pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py similarity index 98% rename from workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py rename to workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py index 2c01421..e9b1de6 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py @@ -16,7 +16,7 @@ from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp -from ..log import build_log +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -80,7 +80,7 @@ def get_out_data( return out_tbl, len(np.where(final_mask)[0]) -if __name__ == "__main__": +def par_geds_dsp_evtsel() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) argparser.add_argument( @@ -168,10 +168,6 @@ def get_out_data( if lh5_path[-1] != "/": lh5_path += "/" - raw_fields = [ - field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path) - ] - tb = sto.read( lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"] )[0] diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py new file mode 100644 index 0000000..691a0e8 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py @@ -0,0 +1,112 @@ +import argparse +import pickle as pkl +import time +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +import pygama.pargen.noise_optimization as pno +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes +from pygama.pargen.dsp_optimize import run_one_dsp + +from ....log import build_log + + +def par_geds_dsp_nopt() -> None: + sto = lh5.LH5Store() + + argparser = argparse.ArgumentParser() + argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) + argparser.add_argument("--database", help="database", type=str, required=True) + argparser.add_argument("--inplots", help="inplots", type=str) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) + argparser.add_argument("--plot_path", help="plot_path", type=str) + + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] + + log = build_log(config_dict, args.log) + + t0 = time.time() + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + dsp_config = config_dict["inputs"]["processing_chain"][args.channel] + opt_json = config_dict["inputs"]["optimiser_config"][args.channel] + + opt_dict = Props.read_from(opt_json) + db_dict = Props.read_from(args.database) + + if opt_dict.pop("run_nopt") is True: + with Path(args.raw_filelist).open() as f: + files = f.read().splitlines() + + raw_files = sorted(files) + + energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] + idxs = np.where(energies.nda == 0)[0] + tb_data = sto.read( + f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs + )[0] + t1 = time.time() + log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") + + log.info(f"Select baselines {len(tb_data)}") + dsp_data = run_one_dsp(tb_data, dsp_config) + cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) + cut_idxs = get_cut_indexes(dsp_data, cut_dict) + tb_data = sto.read( + f"{channel}/raw", + raw_files, + n_rows=opt_dict.pop("n_events"), + idx=idxs[cut_idxs], + )[0] + log.info(f"... {len(tb_data)} baselines after cuts") + + if isinstance(dsp_config, (str, list)): + dsp_config = Props.read_from(dsp_config) + + if args.plot_path: + out_dict, plot_dict = pno.noise_optimization( + tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 + ) + else: + out_dict = pno.noise_optimization( + raw_files, dsp_config, db_dict.copy(), opt_dict, channel + ) + + t2 = time.time() + log.info(f"Optimiser finished in {(t2-t0)/60} minutes") + else: + out_dict = {} + plot_dict = {} + + if args.plot_path: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + if args.inplots: + with Path(args.inplots).open("rb") as r: + old_plot_dict = pkl.load(r) + plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) + else: + plot_dict = {"noise_optimisation": plot_dict} + with Path(args.plot_path).open("wb") as f: + pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) + + Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py new file mode 100644 index 0000000..d4a1e22 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py @@ -0,0 +1,26 @@ +import argparse +from pathlib import Path + +from dbetto.catalog import Props + + +def par_geds_dsp_svm() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument( + "--output_file", help="output par file", type=str, required=True + ) + argparser.add_argument( + "--input_file", help="input par file", type=str, required=True + ) + argparser.add_argument("--svm_file", help="svm file", required=True) + args = argparser.parse_args() + + par_data = Props.read_from(args.input_file) + + file = f"'$_/{Path(args.svm_file).name}'" + + par_data["svm"] = {"model_file": file} + + Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.output_file, par_data) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py new file mode 100644 index 0000000..162ccfa --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py @@ -0,0 +1,63 @@ +import argparse +import pickle as pkl +from pathlib import Path + +from dbetto import TextDB +from dbetto.catalog import Props +from lgdo import lh5 +from sklearn.svm import SVC + +from ....log import build_log + + +def par_geds_dsp_svm_build() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--configs", help="config file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + + argparser.add_argument( + "--output_file", help="output SVM file", type=str, required=True + ) + argparser.add_argument( + "--train_data", help="input data file", type=str, required=True + ) + argparser.add_argument( + "--train_hyperpars", help="input hyperparameter file", required=True + ) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] + + log = build_log(config_dict, args.log) + + # Load files + tb = lh5.read("ml_train/dsp", args.train_data) + log.debug("loaded data") + + hyperpars = Props.read_from(args.train_hyperpars) + + # Define training inputs + dwts_norm = tb["dwt_norm"].nda + labels = tb["dc_label"].nda + + log.debug("training model") + # Initialize and train SVM + svm = SVC( + random_state=int(hyperpars["random_state"]), + kernel=hyperpars["kernel"], + decision_function_shape=hyperpars["decision_function_shape"], + class_weight=hyperpars["class_weight"], + C=float(hyperpars["C"]), + gamma=float(hyperpars["gamma"]), + ) + + svm.fit(dwts_norm, labels) + log.debug("trained model") + + # Save trained model with pickle + with Path(args.output_file).open("wb") as svm_file: + pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py new file mode 100644 index 0000000..4d493a1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py @@ -0,0 +1,146 @@ +import argparse +import pickle as pkl +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids +from pygama.pargen.dsp_optimize import run_one_dsp +from pygama.pargen.extract_tau import ExtractTau + +from ....log import build_log + + +def par_geds_dsp_tau() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--configs", help="configs path", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--plot_path", help="plot path", type=str, required=False) + argparser.add_argument("--output_file", help="output file", type=str, required=True) + + argparser.add_argument( + "--pulser_file", help="pulser file", type=str, required=False + ) + + argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) + argparser.add_argument( + "--tcm_files", help="tcm_files", nargs="*", type=str, required=False + ) + args = argparser.parse_args() + + sto = lh5.LH5Store() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] + + log = build_log(config_dict, args.log) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + channel_dict = config_dict["inputs"]["processing_chain"][args.channel] + kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] + + kwarg_dict = Props.read_from(kwarg_dict) + + if kwarg_dict["run_tau"] is True: + dsp_config = Props.read_from(channel_dict) + kwarg_dict.pop("run_tau") + if ( + isinstance(args.raw_files, list) + and args.raw_files[0].split(".")[-1] == "filelist" + ): + input_file = args.raw_files[0] + with Path(input_file).open() as f: + input_file = f.read().splitlines() + else: + input_file = args.raw_files + + if args.pulser_file: + pulser_dict = Props.read_from(args.pulser_file) + mask = np.array(pulser_dict["mask"]) + + elif args.tcm_filelist: + # get pulser mask from tcm files + with Path(args.tcm_filelist).open() as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data = sto.read( + f"{channel}/raw", + input_file, + field_mask=["daqenergy", "timestamp", "t_sat_lo"], + )[0].view_as("pd") + threshold = kwarg_dict.pop("threshold") + + discharges = data["t_sat_lo"] > 0 + discharge_timestamps = np.where(data["timestamp"][discharges])[0] + is_recovering = np.full(len(data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + ( + ((data["timestamp"] - tstamp) < 0.01) + & ((data["timestamp"] - tstamp) > 0) + ), + True, + False, + ) + cuts = np.where( + (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering) + )[0] + + tb_data = sto.read( + f"{channel}/raw", + input_file, + idx=cuts, + n_rows=kwarg_dict.pop("n_events"), + )[0] + + tb_out = run_one_dsp(tb_data, dsp_config) + log.debug("Processed Data") + cut_parameters = kwarg_dict.get("cut_parameters", None) + if cut_parameters is not None: + idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters) + log.debug("Applied cuts") + log.debug(f"{len(idxs)} events passed cuts") + else: + idxs = np.full(len(tb_out), True, dtype=bool) + + tau = ExtractTau(dsp_config, kwarg_dict["wf_field"]) + slopes = tb_out["tail_slope"].nda + log.debug("Calculating pz constant") + + tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) + + if args.plot_path: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + + plot_dict = tau.plot_waveforms_after_correction( + tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") + ) + plot_dict.update(tau.plot_slopes(slopes[idxs])) + + with Path(args.plot_path).open("wb") as f: + pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) + out_dict = tau.output_dict + else: + out_dict = {} + + Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.output_file, out_dict) diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py new file mode 100644 index 0000000..2b6c6e1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py @@ -0,0 +1,262 @@ +from __future__ import annotations + +import argparse +import pickle as pkl +import warnings +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.AoE_cal import * # noqa: F403 +from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.utils import load_data + +from ....convert_np import convert_dict_np_to_float +from ....log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +def get_results_dict(aoe_class): + return { + "cal_energy_param": aoe_class.cal_energy_param, + "dt_param": aoe_class.dt_param, + "rt_correction": aoe_class.dt_corr, + "1000-1300keV": aoe_class.timecorr_df.to_dict("index"), + "correction_fit_results": aoe_class.energy_corr_res_dict, + "low_cut": aoe_class.low_cut_val, + "high_cut": aoe_class.high_cut_val, + "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"), + "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"), + } + + +def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): + if plot_dict is not None: + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](aoe_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](aoe_class, data) + else: + plot_dict = {} + return plot_dict + + +def par_geds_hit_aoe() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("files", help="files", nargs="*", type=str) + argparser.add_argument( + "--pulser_file", help="pulser_file", type=str, required=False + ) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, required=False + ) + + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) + argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) + argparser.add_argument("--hit_pars", help="hit_pars", type=str) + argparser.add_argument("--aoe_results", help="aoe_results", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] + + log = build_log(config_dict, args.log) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + channel_dict = config_dict["inputs"]["aoecal_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) + + ecal_dict = Props.read_from(args.ecal_file) + cal_dict = ecal_dict["pars"] + eres_dict = ecal_dict["results"]["ecal"] + + with Path(args.eres_file).open("rb") as o: + object_dict = pkl.load(o) + + if kwarg_dict["run_aoe"] is True: + kwarg_dict.pop("run_aoe") + + pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak + + sigma_func = ( + eval(kwarg_dict.pop("sigma_func")) + if "sigma_func" in kwarg_dict + else SigmaFit + ) + + mean_func = ( + eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 + ) + + if "plot_options" in kwarg_dict: + for field, item in kwarg_dict["plot_options"].items(): + kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) + + with Path(args.files[0]).open() as f: + files = f.read().splitlines() + files = sorted(files) + + try: + eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() + + def eres_func(x): + return eval(eres["expression"], dict(x=x, **eres["parameters"])) + + except KeyError: + + def eres_func(x): + return x * np.nan + + params = [ + kwarg_dict["current_param"], + "tp_0_est", + "tp_99", + kwarg_dict["energy_param"], + kwarg_dict["cal_energy_param"], + kwarg_dict["cut_field"], + "timestamp", + ] + + if "dt_param" in kwarg_dict: + params += kwarg_dict["dt_param"] + else: + params += "dt_eff" + + if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None: + cal_dict.update(kwarg_dict["dt_cut"]["cut"]) + params.append(kwarg_dict["dt_cut"]["out_param"]) + + # load data in + data, threshold_mask = load_data( + files, + f"{channel}/dsp", + cal_dict, + params=params, + threshold=kwarg_dict.pop("threshold"), + return_selection_mask=True, + ) + + if args.pulser_file: + pulser_dict = Props.read_from(args.pulser_file) + mask = np.array(pulser_dict["mask"]) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with Path(args.tcm_filelist).open() as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data["is_pulser"] = mask[threshold_mask] + + data["AoE_Uncorr"] = ( + data[kwarg_dict["current_param"]] / data[kwarg_dict["energy_param"]] + ) + aoe = CalAoE( + cal_dicts=cal_dict, + cal_energy_param=kwarg_dict["cal_energy_param"], + eres_func=eres_func, + pdf=pdf, + mean_func=mean_func, + sigma_func=sigma_func, + selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", + dt_corr=kwarg_dict.get("dt_corr", False), + dep_correct=kwarg_dict.get("dep_correct", False), + dt_cut=kwarg_dict.get("dt_cut", None), + dt_param=kwarg_dict.get("dt_param", 3), + high_cut_val=kwarg_dict.get("high_cut_val", 3), + compt_bands_width=kwarg_dict.get("debug_mode", 20), + debug_mode=args.debug | kwarg_dict.get("debug_mode", False), + ) + aoe.update_cal_dicts( + { + "AoE_Uncorr": { + "expression": f"{kwarg_dict['current_param']}/{kwarg_dict['energy_param']}", + "parameters": {}, + } + } + ) + aoe.calibrate(data, "AoE_Uncorr") + + log.info("Calibrated A/E") + out_dict = get_results_dict(aoe) + plot_dict = fill_plot_dict(aoe, data, kwarg_dict.get("plot_options", None)) + + aoe.pdf = aoe.pdf.name + + # need to change eres func as can't pickle lambdas + try: + aoe.eres_func = eres_dict[kwarg_dict["cal_energy_param"]][ + "eres_linear" + ].copy() + except KeyError: + aoe.eres_func = {} + else: + out_dict = {} + plot_dict = {} + aoe = None + + if args.plot_file: + common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None + if args.inplots: + with Path(args.inplots).open("rb") as r: + out_plot_dict = pkl.load(r) + out_plot_dict.update({"aoe": plot_dict}) + else: + out_plot_dict = {"aoe": plot_dict} + + if "common" in list(out_plot_dict) and common_dict is not None: + out_plot_dict["common"].update(common_dict) + elif common_dict is not None: + out_plot_dict["common"] = common_dict + + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: + pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) + results_dict = dict(**ecal_dict["results"], aoe=out_dict) + final_hit_dict = { + "pars": {"operations": cal_dict}, + "results": results_dict, + } + + final_hit_dict = convert_dict_np_to_float(final_hit_dict) + + Props.write_to(args.hit_pars, final_hit_dict) + + Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) + final_object_dict = dict( + **object_dict, + aoe=aoe, + ) + with Path(args.aoe_results).open("wb") as w: + pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py similarity index 99% rename from workflow/src/legenddataflow/scripts/pars_hit_ecal.py rename to workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py index 725fc84..c67e304 100644 --- a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py @@ -2,7 +2,6 @@ import argparse import copy -import logging import pickle as pkl import warnings from datetime import datetime @@ -24,10 +23,9 @@ from pygama.pargen.utils import load_data from scipy.stats import binned_statistic -from ..convert_np import convert_dict_np_to_float -from ..log import build_log +from ....convert_np import convert_dict_np_to_float +from ....log import build_log -log = logging.getLogger(__name__) mpl.use("agg") sto = lh5.LH5Store() @@ -437,7 +435,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): } -if __name__ == "__main__": +def par_geds_hit_ecal() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="filelist", nargs="*", type=str) argparser.add_argument( @@ -478,7 +476,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): msg = "invalid tier" raise ValueError(msg) - log = build_log(config_dict, args.log) + build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py new file mode 100644 index 0000000..357fe33 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +import argparse +import pickle as pkl +import warnings +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.lq_cal import * # noqa: F403 +from pygama.pargen.lq_cal import LQCal +from pygama.pargen.utils import load_data + +from ....convert_np import convert_dict_np_to_float +from ....log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +def get_results_dict(lq_class): + return { + "cal_energy_param": lq_class.cal_energy_param, + "DEP_means": lq_class.timecorr_df.to_dict("index"), + "rt_correction": lq_class.dt_fit_pars, + "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), + "cut_value": lq_class.cut_val, + "sfs": lq_class.low_side_sf.to_dict("index"), + } + + +def fill_plot_dict(lq_class, data, plot_options, plot_dict=None): + if plot_dict is not None: + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](lq_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](lq_class, data) + else: + plot_dict = {} + return plot_dict + + +def par_geds_hit_lq() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("files", help="files", nargs="*", type=str) + argparser.add_argument( + "--pulser_file", help="pulser_file", type=str, required=False + ) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, required=False + ) + + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) + argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) + argparser.add_argument("--hit_pars", help="hit_pars", type=str) + argparser.add_argument("--lq_results", help="lq_results", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] + + log = build_log(config_dict, args.log) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) + + ecal_dict = Props.read_from(args.ecal_file) + cal_dict = ecal_dict["pars"]["operations"] + eres_dict = ecal_dict["results"]["ecal"] + + with Path(args.eres_file).open("rb") as o: + object_dict = pkl.load(o) + + if kwarg_dict["run_lq"] is True: + kwarg_dict.pop("run_lq") + + cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian + + if "plot_options" in kwarg_dict: + for field, item in kwarg_dict["plot_options"].items(): + kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) + + with Path(args.files[0]).open() as f: + files = f.read().splitlines() + files = sorted(files) + + try: + eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() + + def eres_func(x): + return eval(eres["expression"], dict(x=x, **eres["parameters"])) + + except KeyError: + + def eres_func(x): + return x * np.nan + + params = [ + "lq80", + "dt_eff", + kwarg_dict["energy_param"], + kwarg_dict["cal_energy_param"], + kwarg_dict["cut_field"], + ] + + # load data in + data, threshold_mask = load_data( + files, + f"{channel}/dsp", + cal_dict, + params=params, + threshold=kwarg_dict.pop("threshold"), + return_selection_mask=True, + ) + + if args.pulser_file: + pulser_dict = Props.read_from(args.pulser_file) + mask = np.array(pulser_dict["mask"]) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with Path(args.tcm_filelist).open() as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data["is_pulser"] = mask[threshold_mask] + + lq = LQCal( + cal_dict, + kwarg_dict["cal_energy_param"], + kwarg_dict["dt_param"], + eres_func, + cdf, + selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", + debug_mode=args.debug_mode | kwarg_dict.get("debug_mode", False), + ) + + data["LQ_Ecorr"] = np.divide(data["lq80"], data[kwarg_dict["energy_param"]]) + + lq.update_cal_dicts( + { + "LQ_Ecorr": { + "expression": f"lq80/{kwarg_dict['energy_param']}", + "parameters": {}, + } + } + ) + + lq.calibrate(data, "LQ_Ecorr") + log.info("Calibrated LQ") + + out_dict = get_results_dict(lq) + plot_dict = fill_plot_dict(lq, data, kwarg_dict.get("plot_options", None)) + + # need to change eres func as can't pickle lambdas + try: + lq.eres_func = eres_dict[kwarg_dict["cal_energy_param"]][ + "eres_linear" + ].copy() + except KeyError: + lq.eres_func = {} + else: + out_dict = {} + plot_dict = {} + lq = None + + if args.plot_file: + common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None + if args.inplots: + with Path(args.inplots).open("rb") as r: + out_plot_dict = pkl.load(r) + out_plot_dict.update({"lq": plot_dict}) + else: + out_plot_dict = {"lq": plot_dict} + + if "common" in list(out_plot_dict) and common_dict is not None: + out_plot_dict["common"].update(common_dict) + elif common_dict is not None: + out_plot_dict["common"] = common_dict + + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: + pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + final_hit_dict = convert_dict_np_to_float( + { + "pars": {"operations": cal_dict}, + "results": dict(**eres_dict, lq=out_dict), + } + ) + Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.hit_pars, final_hit_dict) + + final_object_dict = dict( + **object_dict, + lq=lq, + ) + Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) + with Path(args.lq_results).open("wb") as w: + pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_qc.py b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py similarity index 98% rename from workflow/src/legenddataflow/scripts/pars_hit_qc.py rename to workflow/src/legenddataflow/scripts/par/geds/hit/qc.py index 5e6a378..acc1a32 100644 --- a/workflow/src/legenddataflow/scripts/pars_hit_qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import re import warnings @@ -20,15 +19,13 @@ ) from pygama.pargen.utils import load_data -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -log = logging.getLogger(__name__) +from ....convert_np import convert_dict_np_to_float +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) -if __name__ == "__main__": +def par_geds_hit_qc() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py similarity index 84% rename from workflow/src/legenddataflow/scripts/pars_pht_aoecal.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py index 8aad849..12c70f8 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py @@ -2,12 +2,10 @@ import argparse import copy -import logging import pickle as pkl import re import warnings from pathlib import Path -from typing import Callable import numpy as np import pandas as pd @@ -19,10 +17,9 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data -from ..FileKey import ChannelProcKey, ProcessingFileKey -from ..log import build_log +from ....FileKey import ChannelProcKey, ProcessingFileKey +from ....log import build_log -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -77,62 +74,6 @@ def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): return plot_dict -def aoe_calibration( - data: pd.Dataframe, - cal_dicts: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_func: Callable, - pdf: Callable = aoe_peak, - selection_string: str = "", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict | None = None, - high_cut_val: int = 3, - mean_func: Callable = Pol1, - sigma_func: Callable = SigmaFit, - # dep_acc: float = 0.9, - dt_param: str = "dt_eff", - comptBands_width: int = 20, - plot_options: dict | None = None, - debug_mode: bool = False, -): - data["AoE_Uncorr"] = data[current_param] / data[energy_param] - aoe = CalAoE( - cal_dicts=cal_dicts, - cal_energy_param=cal_energy_param, - eres_func=eres_func, - pdf=pdf, - selection_string=selection_string, - dt_corr=dt_corr, - dep_correct=dep_correct, - dt_cut=dt_cut, - dt_param=dt_param, - high_cut_val=high_cut_val, - mean_func=mean_func, - sigma_func=sigma_func, - compt_bands_width=comptBands_width, - debug_mode=debug_mode | args.debug, - ) - aoe.update_cal_dicts( - { - "AoE_Uncorr": { - "expression": f"{current_param}/{energy_param}", - "parameters": {}, - } - } - ) - aoe.calibrate(data, "AoE_Uncorr") - log.info("Calibrated A/E") - return ( - cal_dicts, - get_results_dict(aoe), - fill_plot_dict(aoe, data, plot_options), - aoe, - ) - - def run_aoe_calibration( data, cal_dicts, @@ -143,6 +84,7 @@ def run_aoe_calibration( configs, channel, datatype, + debug_mode=False, # gen_plots=True, ): configs = LegendMetadata(path=configs) @@ -211,28 +153,50 @@ def eres_func(x): def eres_func(x): return x * np.nan - cal_dicts, out_dict, aoe_plot_dict, aoe_obj = aoe_calibration( - data, - selection_string=f"{kwarg_dict.pop('final_cut_field')}&(~is_pulser)", + data["AoE_Uncorr"] = ( + data[kwarg_dict["current_param"]] / data[kwarg_dict["energy_param"]] + ) + aoe = CalAoE( cal_dicts=cal_dicts, + cal_energy_param=kwarg_dict["cal_energy_param"], eres_func=eres_func, pdf=pdf, mean_func=mean_func, sigma_func=sigma_func, - **kwarg_dict, + selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", + dt_corr=kwarg_dict.get("dt_corr", False), + dep_correct=kwarg_dict.get("dep_correct", False), + dt_cut=kwarg_dict.get("dt_cut", None), + dt_param=kwarg_dict.get("dt_param", 3), + high_cut_val=kwarg_dict.get("high_cut_val", 3), + compt_bands_width=kwarg_dict.get("debug_mode", 20), + debug_mode=debug_mode | kwarg_dict.get("debug_mode", False), ) - aoe_obj.pdf = aoe_obj.pdf.name + aoe.update_cal_dicts( + { + "AoE_Uncorr": { + "expression": f"{kwarg_dict['current_param']}/{kwarg_dict['energy_param']}", + "parameters": {}, + } + } + ) + aoe.calibrate(data, "AoE_Uncorr") + + out_dict = get_results_dict(aoe) + plot_dict = fill_plot_dict(aoe, data, kwarg_dict.get("plot_options", None)) + + aoe.pdf = aoe.pdf.name # need to change eres func as can't pickle lambdas try: - aoe_obj.eres_func = results_dicts[next(iter(results_dicts))][ - "partition_ecal" - ][kwarg_dict["cal_energy_param"]]["eres_linear"] + aoe.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][ + kwarg_dict["cal_energy_param"] + ]["eres_linear"] except KeyError: - aoe_obj.eres_func = {} + aoe.eres_func = {} else: out_dict = {tstamp: None for tstamp in cal_dicts} aoe_plot_dict = {} - aoe_obj = None + aoe = None out_result_dicts = {} for tstamp, result_dict in results_dicts.items(): @@ -240,7 +204,7 @@ def eres_func(x): out_object_dicts = {} for tstamp, object_dict in object_dicts.items(): - out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe_obj) + out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe) common_dict = ( aoe_plot_dict.pop("common") if "common" in list(aoe_plot_dict) else None @@ -257,7 +221,7 @@ def eres_func(x): return cal_dicts, out_result_dicts, out_object_dicts, out_plot_dicts -if __name__ == "__main__": +def par_geds_pht_aoe() -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "--input_files", help="files", type=str, nargs="*", required=True @@ -298,7 +262,7 @@ def eres_func(x): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_aoecal"] - log = build_log(config_dict, args.log) + build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) @@ -424,6 +388,7 @@ def eres_func(x): args.configs, args.channel, args.datatype, + debug_mode=args.debug, # gen_plots=bool(args.plot_file), ) @@ -436,7 +401,7 @@ def eres_func(x): for out in sorted(args.hit_pars): fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { - "pars": cal_dict[fk.timestamp], + "pars": cal_dicts[fk.timestamp], "results": results_dicts[fk.timestamp], } Path(out).parent.mkdir(parents=True, exist_ok=True) @@ -446,4 +411,4 @@ def eres_func(x): fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) Path(out).parent.mkdir(parents=True, exist_ok=True) with Path(out).open("wb") as w: - pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) + pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py similarity index 99% rename from workflow/src/legenddataflow/scripts/pars_pht_partcal.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py index bd2d93f..560a063 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py @@ -19,8 +19,8 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data -from ..FileKey import ChannelProcKey, ProcessingFileKey -from ..log import build_log +from ....FileKey import ChannelProcKey, ProcessingFileKey +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_fast.py b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py similarity index 95% rename from workflow/src/legenddataflow/scripts/pars_pht_fast.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/fast.py index 6dda1b7..0faa42d 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_fast.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py @@ -11,14 +11,14 @@ from dbetto import TextDB from dbetto.catalog import Props from legendmeta import LegendMetadata -from pars_pht_aoecal import run_aoe_calibration -from pars_pht_lqcal import run_lq_calibration -from pars_pht_partcal import calibrate_partition from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from workflow.src.legenddataflow.scripts.par.geds.pht.aoe import run_aoe_calibration +from workflow.src.legenddataflow.scripts.par.geds.pht.lq import run_lq_calibration +from workflow.src.legenddataflow.scripts.par.geds.pht.partcal import calibrate_partition -from ..FileKey import ChannelProcKey, ProcessingFileKey -from ..log import build_log +from ....FileKey import ChannelProcKey, ProcessingFileKey +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -42,7 +42,7 @@ def run_splitter(files): return run_files -if __name__ == "__main__": +def par_geds_pht_fast() -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "--input_files", help="files", type=str, nargs="*", required=True @@ -83,7 +83,7 @@ def run_splitter(files): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"] - log = build_log(config_dict["pars_pht_partcal"], args.log) + build_log(config_dict["pars_pht_partcal"], args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py similarity index 100% rename from workflow/src/legenddataflow/scripts/pars_pht_lqcal.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/lq.py diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py similarity index 98% rename from workflow/src/legenddataflow/scripts/pars_pht_qc.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/qc.py index feee4e5..af6dc95 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import re import warnings @@ -20,10 +19,8 @@ ) from pygama.pargen.utils import load_data -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -log = logging.getLogger(__name__) +from ....convert_np import convert_dict_np_to_float +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py similarity index 97% rename from workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py index 71167df..38f5e20 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import re import warnings @@ -19,10 +18,8 @@ get_keys, ) -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -log = logging.getLogger(__name__) +from ....convert_np import convert_dict_np_to_float +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py new file mode 100644 index 0000000..65508a2 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py @@ -0,0 +1,160 @@ +import argparse +import pickle as pkl +from datetime import datetime +from pathlib import Path + +import matplotlib as mpl +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +import numpy as np +from dbetto.catalog import Props +from legendmeta import LegendMetadata + +from ....FileKey import ChannelProcKey + +mpl.use("Agg") + + +def par_geds_psp_average() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument( + "--input", help="input files", nargs="*", type=str, required=True + ) + argparser.add_argument( + "--output", help="output file", nargs="*", type=str, required=True + ) + argparser.add_argument( + "--in_plots", help="input plot files", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--out_plots", help="output plot files", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--in_obj", help="input object files", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--out_obj", help="output object files", nargs="*", type=str, required=False + ) + + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + args = argparser.parse_args() + + configs = LegendMetadata(args.configs, lazy=True).on( + args.timestamp, system=args.datatype + ) + merge_config = Props.read_from( + configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel] + ) + + ave_fields = merge_config["average_fields"] + + # partitions could be different for different channels - do separately for each channel + in_dicts = {} + for file in args.input: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + in_dicts[tstamp] = Props.read_from(file) + + plot_dict = {} + for field in ave_fields: + keys = field.split(".") + vals = [] + for _tstamp, tstamp_dict in in_dicts.items(): + val = tstamp_dict.copy() + for key in keys: + val = val[key] + vals.append(val) + if "dsp" in tstamp_dict: + tmp_dict = tstamp_dict["dsp"] + else: + tmp_dict = {} + tstamp_dict["dsp"] = tmp_dict + for i, key in enumerate(keys): + if i == len(keys) - 1: + tmp_dict[key] = val + else: + if key in tmp_dict: + tmp_dict = tmp_dict[key] + else: + tmp_dict[key] = {} + tmp_dict = tmp_dict[key] + if isinstance(vals[0], str): + if "*" in vals[0]: + unit = vals[0].split("*")[1] + rounding = ( + len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16 + ) + vals = np.array([float(val.split("*")[0]) for val in vals]) + else: + unit = None + rounding = 16 + else: + vals = np.array(vals) + unit = None + rounding = 16 + + mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals) + mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val + + for _tstamp, tstamp_dict in in_dicts.items(): + val = tstamp_dict + for i, key in enumerate(keys): + if i == len(keys) - 1: + val[key] = mean + else: + val = val[key] + + fig = plt.figure() + plt.scatter( + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals + ) + plt.axhline(y=mean_val, color="r", linestyle="-") + plt.xlabel("time") + if unit is not None: + plt.ylabel(f"value {unit}") + else: + plt.ylabel("value") + plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) + plt.gcf().autofmt_xdate() + plt.title(field) + plot_dict[field] = fig + plt.close() + + for file in args.output: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + Props.write_to(file, in_dicts[tstamp]) + + if args.out_plots: + for file in args.out_plots: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + if args.in_plots: + for infile in args.in_plots: + if tstamp in infile: + with Path(infile).open("rb") as f: + old_plot_dict = pkl.load(f) + break + old_plot_dict.update({"psp": plot_dict}) + new_plot_dict = old_plot_dict + else: + new_plot_dict = {"psp": plot_dict} + with Path(file).open("wb") as f: + pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) + + if args.out_obj: + for file in args.out_obj: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + if args.in_obj: + for infile in args.in_obj: + if tstamp in infile: + with Path(infile).open("rb") as f: + old_obj_dict = pkl.load(f) + break + new_obj_dict = old_obj_dict + else: + new_obj_dict = {} + with Path(file).open("wb") as f: + pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py new file mode 100644 index 0000000..a937458 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py @@ -0,0 +1,113 @@ +""" +This script applies a simple calibration to the daqenergy for all channels, +it does this using a peak search, matching the peaks to the given ones +and deriving a simple scaling relation from adc to keV. +""" + +import argparse +import logging +import pickle as pkl +from pathlib import Path + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from lgdo import lh5 +from pygama.pargen.energy_cal import HPGeCalibration + +mpl.use("agg") + + +def par_geds_raw_blindcal() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--files", help="files", nargs="*", type=str) + + argparser.add_argument("--blind_curve", help="blind_curve", type=str) + argparser.add_argument("--plot_file", help="out plot path", type=str) + + argparser.add_argument("--meta", help="meta", type=str) + argparser.add_argument("--configs", help="configs", type=str) + argparser.add_argument("--log", help="log", type=str) + + argparser.add_argument("--timestamp", help="timestamp", type=str) + argparser.add_argument("--datatype", help="datatype", type=str) + argparser.add_argument("--channel", help="channel", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") + args = argparser.parse_args() + + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + log = logging.getLogger(__name__) + + meta = LegendMetadata(path=args.meta) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + # peaks to search for + peaks_keV = np.array( + [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50] + ) + + E_uncal = lh5.read(f"{channel}/raw/daqenergy", sorted(args.files))[0].view_as("np") + E_uncal = E_uncal[E_uncal > 200] + guess_keV = 2620 / np.nanpercentile(E_uncal, 99) # usual simple guess + + # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins) + + hpge_cal = HPGeCalibration( + "daqenergy", + peaks_keV, + guess_keV, + 0, + uncal_is_int=True, + debug_mode=args.debug, + ) + + # Run the rough peak search + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks( + E_uncal + ) + + log.info(f"{len(detected_peaks_locs)} peaks found:") + log.info("\t Energy | Position ") + for i, (Li, Ei) in enumerate(zip(detected_peaks_locs, detected_peaks_keV)): + log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5)) # noqa: G003 + + # dictionary to pass to build hit + out_dict = { + "pars": { + "operations": { + "daqenergy_cal": { + "expression": "daqenergy*a", + "parameters": {"a": round(roughpars[0], 5)}, + } + } + } + } + + # plot to check thagt the calibration is correct with zoom on 2.6 peak + fig = plt.figure(figsize=(8, 10)) + ax = plt.subplot(211) + ax.hist(E_uncal * roughpars[0], bins=np.arange(0, 3000, 1), histtype="step") + ax.set_ylabel("counts") + ax.set_yscale("log") + ax2 = plt.subplot(212) + ax2.hist( + E_uncal * roughpars[0], + bins=np.arange(2600, 2630, 1 * roughpars[0]), + histtype="step", + ) + ax2.set_xlabel("energy (keV)") + ax2.set_ylabel("counts") + plt.suptitle(args.channel) + with Path(args.plot_file).open("wb") as w: + pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) + plt.close() + + Props.write_to_file(args.blind_curve, out_dict) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py new file mode 100644 index 0000000..7f645c1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py @@ -0,0 +1,113 @@ +""" +This script checks that the blinding for a particular channel is still valid, +it does this by taking the calibration curve stored in the overrides, applying it +to the daqenergy, running a peak search over the calibrated energy and checking that +there are peaks within 5keV of the 583 and 2614 peaks. If the detector is in ac mode +then it will skip the check. +""" + +import argparse +import pickle as pkl +from pathlib import Path + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numexpr as ne +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from lgdo import lh5 +from pygama.math.histogram import get_hist +from pygama.pargen.energy_cal import get_i_local_maxima + +from ....log import build_log + +mpl.use("Agg") + + +def par_geds_raw_blindcheck() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--files", help="files", nargs="*", type=str) + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument("--plot_file", help="plot file", type=str) + argparser.add_argument( + "--blind_curve", help="blinding curves file", nargs="*", type=str + ) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--channel", help="channel", type=str) + argparser.add_argument("--metadata", help="channel", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] + + log = build_log(config_dict, args.log) + + # get the usability status for this channel + chmap = LegendMetadata(args.metadata, lazy=True).channelmap( + args.timestamp, system=args.datatype + ) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + det_status = chmap[args.channel]["analysis"]["is_blinded"] + + # read in calibration curve for this channel + blind_curve = Props.read_from(args.blind_curve)[channel]["pars"]["operations"] + + # load in the data + daqenergy = lh5.read(f"{channel}/raw/daqenergy", sorted(args.files))[0].view_as( + "np" + ) + + # calibrate daq energy using pre existing curve + daqenergy_cal = ne.evaluate( + blind_curve["daqenergy_cal"]["expression"], + local_dict=dict( + daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"] + ), + ) + + # bin with 1 keV bins and get maxs + hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1)) + maxs = get_i_local_maxima(hist, delta=25) + log.info(f"peaks found at : {maxs}") + + # plot the energy spectrum to check calibration + fig = plt.figure(figsize=(8, 10)) + ax = plt.subplot(211) + ax.hist(daqenergy_cal, bins=np.arange(0, 3000, 1), histtype="step") + ax.set_ylabel("counts") + ax.set_yscale("log") + ax2 = plt.subplot(212) + ax2.hist( + daqenergy_cal, + bins=np.arange(2600, 2630, 1 * blind_curve["daqenergy_cal"]["parameters"]["a"]), + histtype="step", + ) + ax2.set_xlabel("energy (keV)") + ax2.set_ylabel("counts") + plt.suptitle(args.channel) + with Path(args.plot_file).open("wb") as w: + pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) + plt.close() + + # check for peaks within +- 5keV of 2614 and 583 to ensure blinding still + # valid and if so create file else raise error. if detector is in ac mode it + # will always pass this check + if ( + np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) + ) or det_status is False: + Path(args.output).mkdir(parents=True, exist_ok=True) + Props.write_to( + args.output, + { + "threshold_adc": np.nanmin(daqenergy), + "threshold_kev": np.nanmin(daqenergy_cal), + }, + ) + else: + msg = "peaks not found in daqenergy" + raise RuntimeError(msg) diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py new file mode 100644 index 0000000..ab5f400 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py @@ -0,0 +1,58 @@ +import argparse +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.data_cleaning import get_tcm_pulser_ids + +from ....log import build_log + + +def par_geds_tcm_pulser() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--configs", help="configs path", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument( + "--pulser_file", help="pulser file", type=str, required=False + ) + + argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] + + build_log(config_dict, args.log) + + kwarg_dict = config_dict["inputs"]["pulser_config"] + kwarg_dict = Props.read_from(kwarg_dict) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid}" + + if ( + isinstance(args.tcm_files, list) + and args.tcm_files[0].split(".")[-1] == "filelist" + ): + tcm_files = args.tcm_files[0] + with Path(tcm_files).open() as f: + tcm_files = f.read().splitlines() + else: + tcm_files = args.tcm_files + # get pulser mask from tcm files + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + + Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/workflow/src/legenddataflow/scripts/par_psp_geds.py b/workflow/src/legenddataflow/scripts/par_psp_geds.py deleted file mode 100644 index e65903c..0000000 --- a/workflow/src/legenddataflow/scripts/par_psp_geds.py +++ /dev/null @@ -1,157 +0,0 @@ -import argparse -import pickle as pkl -from datetime import datetime -from pathlib import Path - -import matplotlib as mpl -import matplotlib.dates as mdates -import matplotlib.pyplot as plt -import numpy as np -from dbetto.catalog import Props -from legendmeta import LegendMetadata - -from ..FileKey import ChannelProcKey - -mpl.use("Agg") - - -argparser = argparse.ArgumentParser() -argparser.add_argument( - "--input", help="input files", nargs="*", type=str, required=True -) -argparser.add_argument( - "--output", help="output file", nargs="*", type=str, required=True -) -argparser.add_argument( - "--in_plots", help="input plot files", nargs="*", type=str, required=False -) -argparser.add_argument( - "--out_plots", help="output plot files", nargs="*", type=str, required=False -) -argparser.add_argument( - "--in_obj", help="input object files", nargs="*", type=str, required=False -) -argparser.add_argument( - "--out_obj", help="output object files", nargs="*", type=str, required=False -) - -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--configs", help="configs", type=str, required=True) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) -args = argparser.parse_args() - -configs = LegendMetadata(args.configs, lazy=True).on( - args.timestamp, system=args.datatype -) -merge_config = Props.read_from( - configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel] -) - -ave_fields = merge_config["average_fields"] - -# partitions could be different for different channels - do separately for each channel -in_dicts = {} -for file in args.input: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - in_dicts[tstamp] = Props.read_from(file) - -plot_dict = {} -for field in ave_fields: - keys = field.split(".") - vals = [] - for _tstamp, tstamp_dict in in_dicts.items(): - val = tstamp_dict.copy() - for key in keys: - val = val[key] - vals.append(val) - if "dsp" in tstamp_dict: - tmp_dict = tstamp_dict["dsp"] - else: - tmp_dict = {} - tstamp_dict["dsp"] = tmp_dict - for i, key in enumerate(keys): - if i == len(keys) - 1: - tmp_dict[key] = val - else: - if key in tmp_dict: - tmp_dict = tmp_dict[key] - else: - tmp_dict[key] = {} - tmp_dict = tmp_dict[key] - if isinstance(vals[0], str): - if "*" in vals[0]: - unit = vals[0].split("*")[1] - rounding = len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16 - vals = np.array([float(val.split("*")[0]) for val in vals]) - else: - unit = None - rounding = 16 - else: - vals = np.array(vals) - unit = None - rounding = 16 - - mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals) - mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val - - for _tstamp, tstamp_dict in in_dicts.items(): - val = tstamp_dict - for i, key in enumerate(keys): - if i == len(keys) - 1: - val[key] = mean - else: - val = val[key] - - fig = plt.figure() - plt.scatter( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals - ) - plt.axhline(y=mean_val, color="r", linestyle="-") - plt.xlabel("time") - if unit is not None: - plt.ylabel(f"value {unit}") - else: - plt.ylabel("value") - plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) - plt.gcf().autofmt_xdate() - plt.title(field) - plot_dict[field] = fig - plt.close() - -for file in args.output: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - Props.write_to(file, in_dicts[tstamp]) - -if args.out_plots: - for file in args.out_plots: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - if args.in_plots: - for infile in args.in_plots: - if tstamp in infile: - with Path(infile).open("rb") as f: - old_plot_dict = pkl.load(f) - break - old_plot_dict.update({"psp": plot_dict}) - new_plot_dict = old_plot_dict - else: - new_plot_dict = {"psp": plot_dict} - with Path(file).open("wb") as f: - pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) - -if args.out_obj: - for file in args.out_obj: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - if args.in_obj: - for infile in args.in_obj: - if tstamp in infile: - with Path(infile).open("rb") as f: - old_obj_dict = pkl.load(f) - break - new_obj_dict = old_obj_dict - else: - new_obj_dict = {} - with Path(file).open("wb") as f: - pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py deleted file mode 100644 index a5310e9..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py +++ /dev/null @@ -1,57 +0,0 @@ -import argparse -import pickle as pkl -from pathlib import Path - -from dbetto import TextDB -from dbetto.catalog import Props -from lgdo import lh5 -from sklearn.svm import SVC - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--log", help="log file", type=str) -argparser.add_argument("--configs", help="config file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) - -argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) -argparser.add_argument("--train_data", help="input data file", type=str, required=True) -argparser.add_argument( - "--train_hyperpars", help="input hyperparameter file", required=True -) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] - -log = build_log(config_dict, args.log) - -# Load files -tb = lh5.read("ml_train/dsp", args.train_data) -log.debug("loaded data") - -hyperpars = Props.read_from(args.train_hyperpars) - -# Define training inputs -dwts_norm = tb["dwt_norm"].nda -labels = tb["dc_label"].nda - -log.debug("training model") -# Initialize and train SVM -svm = SVC( - random_state=int(hyperpars["random_state"]), - kernel=hyperpars["kernel"], - decision_function_shape=hyperpars["decision_function_shape"], - class_weight=hyperpars["class_weight"], - C=float(hyperpars["C"]), - gamma=float(hyperpars["gamma"]), -) - -svm.fit(dwts_norm, labels) -log.debug("trained model") - -# Save trained model with pickle -with Path(args.output_file).open("wb") as svm_file: - pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py deleted file mode 100644 index a47b653..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py +++ /dev/null @@ -1,148 +0,0 @@ -import argparse -import logging -import pickle as pkl -import time -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from lgdo import Array, Table -from pygama.pargen.dplms_ge_dict import dplms_ge_dict - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) -argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) - -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) -argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True) -argparser.add_argument("--plot_path", help="plot_path", type=str) - -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_dplms"] - -log = build_log(config_dict, args.log) - -log = logging.getLogger(__name__) -sto = lh5.LH5Store() - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -configs = LegendMetadata(args.configs, lazy=True).on( - args.timestamp, system=args.datatype -) -dsp_config = config_dict["inputs"]["proc_chain"][args.channel] - -dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] -dplms_dict = Props.read_from(dplms_json) - -db_dict = Props.read_from(args.database) - -if dplms_dict["run_dplms"] is True: - with Path(args.fft_raw_filelist).open() as f: - fft_files = sorted(f.read().splitlines()) - - t0 = time.time() - log.info("\nLoad fft data") - energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] - idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read( - f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs - )[0] - t1 = time.time() - log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") - - log.info("\nRunning event selection") - peaks_kev = np.array(dplms_dict["peaks_kev"]) - kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - - peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ - "peak" - ].nda - ids = np.isin(peaks, peaks_rounded) - peaks = peaks[ids] - idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - - raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] - log.info( - f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}" - ) - - if isinstance(dsp_config, (str, list)): - dsp_config = Props.read_from(dsp_config) - - if args.plot_path: - out_dict, plot_dict = dplms_ge_dict( - raw_fft, - raw_cal, - dsp_config, - db_dict, - dplms_dict, - display=1, - ) - if args.inplots: - with Path(args.inplots).open("rb") as r: - inplot_dict = pkl.load(r) - inplot_dict.update({"dplms": plot_dict}) - - else: - out_dict = dplms_ge_dict( - raw_fft, - raw_cal, - dsp_config, - db_dict, - dplms_dict, - ) - - coeffs = out_dict["dplms"].pop("coefficients") - dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) - out_dict["dplms"]["coefficients"] = ( - f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" - ) - - log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") -else: - out_dict = {} - dplms_pars = Table(col_dict={"coefficients": Array([])}) - if args.inplots: - with Path(args.inplots).open("rb") as r: - inplot_dict = pkl.load(r) - else: - inplot_dict = {} - -db_dict.update(out_dict) - -Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) -sto.write( - Table(col_dict={"dplms": dplms_pars}), - name=channel, - lh5_file=args.lh5_path, - wo_mode="overwrite", -) - -Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.dsp_pars, db_dict) - -if args.plot_path: - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_path).open("wb") as f: - pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py deleted file mode 100644 index c059961..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py +++ /dev/null @@ -1,395 +0,0 @@ -import argparse -import pickle as pkl -import time -import warnings -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -import pygama.pargen.energy_optimisation as om # noqa: F401 -import sklearn.gaussian_process.kernels as ker -from dbetto import TextDB -from dbetto.catalog import Props -from dspeed.units import unit_registry as ureg -from legendmeta import LegendMetadata -from pygama.math.distributions import hpge_peak -from pygama.pargen.dsp_optimize import ( - BayesianOptimizer, - run_bayesian_optimisation, - run_one_dsp, -) - -from ..log import build_log - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) -warnings.filterwarnings(action="ignore", category=np.RankWarning) - - -argparser = argparse.ArgumentParser() - -argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) -argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str) - -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument( - "--final_dsp_pars", help="final_dsp_pars", type=str, required=True -) -argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str) -argparser.add_argument("--plot_path", help="plot_path", type=str) - -argparser.add_argument( - "--plot_save_path", help="plot_save_path", type=str, required=False -) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] - -log = build_log(config_dict, args.log) - -sto = lh5.LH5Store() -t0 = time.time() - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -dsp_config = config_dict["inputs"]["processing_chain"][args.channel] -opt_json = config_dict["inputs"]["optimiser_config"][args.channel] - -opt_dict = Props.read_from(opt_json) -db_dict = Props.read_from(args.decay_const) - -if opt_dict.pop("run_eopt") is True: - peaks_kev = np.array(opt_dict["peaks"]) - kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]] - - kwarg_dicts_cusp = [] - kwarg_dicts_trap = [] - kwarg_dicts_zac = [] - for peak in peaks_kev: - peak_idx = np.where(peaks_kev == peak)[0][0] - kev_width = kev_widths[peak_idx] - - kwarg_dicts_cusp.append( - { - "parameter": "cuspEmax", - "func": hpge_peak, - "peak": peak, - "kev_width": kev_width, - "bin_width": 5, - } - ) - kwarg_dicts_zac.append( - { - "parameter": "zacEmax", - "func": hpge_peak, - "peak": peak, - "kev_width": kev_width, - "bin_width": 5, - } - ) - kwarg_dicts_trap.append( - { - "parameter": "trapEmax", - "func": hpge_peak, - "peak": peak, - "kev_width": kev_width, - "bin_width": 5, - } - ) - - peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ - "peak" - ].nda - ids = np.isin(peaks, peaks_rounded) - peaks = peaks[ids] - idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - - tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] - - t1 = time.time() - log.info(f"Data Loaded in {(t1-t0)/60} minutes") - - if isinstance(dsp_config, (str, list)): - dsp_config = Props.read_from(dsp_config) - - dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"] - - init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) - full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]] - flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10 - - if flat_val < 1.0: - flat_val = 1.0 - elif flat_val > 4: - flat_val = 4 - flat_val = f"{flat_val}*us" - - db_dict["cusp"] = {"flat": flat_val} - db_dict["zac"] = {"flat": flat_val} - db_dict["etrap"] = {"flat": flat_val} - - tb_data.add_column("dt_eff", init_data["dt_eff"]) - - dsp_config["processors"].pop("dt_eff") - - dsp_config["outputs"] = ["zacEmax", "cuspEmax", "trapEmax", "dt_eff"] - - kwarg_dict = [ - { - "peak_dicts": kwarg_dicts_cusp, - "ctc_param": "dt_eff", - "idx_list": idx_list, - "peaks_kev": peaks_kev, - }, - { - "peak_dicts": kwarg_dicts_zac, - "ctc_param": "dt_eff", - "idx_list": idx_list, - "peaks_kev": peaks_kev, - }, - { - "peak_dicts": kwarg_dicts_trap, - "ctc_param": "dt_eff", - "idx_list": idx_list, - "peaks_kev": peaks_kev, - }, - ] - - fom = eval(opt_dict["fom"]) - out_field = opt_dict["fom_field"] - out_err_field = opt_dict["fom_err_field"] - sample_x = np.array(opt_dict["initial_samples"]) - - results_cusp = [] - results_zac = [] - results_trap = [] - - sample_y_cusp = [] - sample_y_zac = [] - sample_y_trap = [] - - err_y_cusp = [] - err_y_zac = [] - err_y_trap = [] - - for i, x in enumerate(sample_x): - db_dict["cusp"]["sigma"] = f"{x[0]}*us" - db_dict["zac"]["sigma"] = f"{x[0]}*us" - db_dict["etrap"]["rise"] = f"{x[0]}*us" - - log.info(f"Initialising values {i+1} : {db_dict}") - - tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) - - res = fom(tb_out, kwarg_dict[0]) - results_cusp.append(res) - sample_y_cusp.append(res[out_field]) - err_y_cusp.append(res[out_err_field]) - - res = fom(tb_out, kwarg_dict[1]) - results_zac.append(res) - sample_y_zac.append(res[out_field]) - err_y_zac.append(res[out_err_field]) - - res = fom(tb_out, kwarg_dict[2]) - results_trap.append(res) - sample_y_trap.append(res[out_field]) - err_y_trap.append(res[out_err_field]) - - log.info(f"{i+1} Finished") - - if np.isnan(sample_y_cusp).all(): - max_cusp = opt_dict["nan_default"] - else: - max_cusp = np.ceil(np.nanmax(sample_y_cusp) * 2) - if np.isnan(sample_y_zac).all(): - max_zac = opt_dict["nan_default"] - else: - max_zac = np.ceil(np.nanmax(sample_y_zac) * 2) - if np.isnan(sample_y_trap).all(): - max_trap = opt_dict["nan_default"] - else: - max_trap = np.ceil(np.nanmax(sample_y_trap) * 2) - - nan_vals = [max_cusp, max_zac, max_trap] - - for i in range(len(sample_x)): - if np.isnan(sample_y_cusp[i]): - results_cusp[i]["y_val"] = max_cusp - sample_y_cusp[i] = max_cusp - - if np.isnan(sample_y_zac[i]): - results_zac[i]["y_val"] = max_zac - sample_y_zac[i] = max_zac - - if np.isnan(sample_y_trap[i]): - results_trap[i]["y_val"] = max_trap - sample_y_trap[i] = max_trap - - kernel = ( - ker.ConstantKernel(2.0, constant_value_bounds="fixed") - + 1.0 * ker.RBF(1.0, length_scale_bounds=[0.5, 2.5]) - + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1)) - ) - - lambda_param = 5 - sampling_rate = tb_data["waveform_presummed"]["dt"][0] - sampling_unit = ureg.Quantity(tb_data["waveform_presummed"]["dt"].attrs["units"]) - waveform_sampling = sampling_rate * sampling_unit - - bopt_cusp = BayesianOptimizer( - acq_func=opt_dict["acq_func"], - batch_size=opt_dict["batch_size"], - kernel=kernel, - sampling_rate=waveform_sampling, - fom_value=out_field, - fom_error=out_err_field, - ) - bopt_cusp.lambda_param = lambda_param - bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us") - - bopt_zac = BayesianOptimizer( - acq_func=opt_dict["acq_func"], - batch_size=opt_dict["batch_size"], - kernel=kernel, - sampling_rate=waveform_sampling, - fom_value=out_field, - fom_error=out_err_field, - ) - bopt_zac.lambda_param = lambda_param - bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us") - - bopt_trap = BayesianOptimizer( - acq_func=opt_dict["acq_func"], - batch_size=opt_dict["batch_size"], - kernel=kernel, - sampling_rate=waveform_sampling, - fom_value=out_field, - fom_error=out_err_field, - ) - bopt_trap.lambda_param = lambda_param - bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") - - bopt_cusp.add_initial_values( - x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp - ) - bopt_zac.add_initial_values( - x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac - ) - bopt_trap.add_initial_values( - x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap - ) - - best_idx = np.nanargmin(sample_y_cusp) - bopt_cusp.optimal_results = results_cusp[best_idx] - bopt_cusp.optimal_x = sample_x[best_idx] - - best_idx = np.nanargmin(sample_y_zac) - bopt_zac.optimal_results = results_zac[best_idx] - bopt_zac.optimal_x = sample_x[best_idx] - - best_idx = np.nanargmin(sample_y_trap) - bopt_trap.optimal_results = results_trap[best_idx] - bopt_trap.optimal_x = sample_x[best_idx] - - optimisers = [bopt_cusp, bopt_zac, bopt_trap] - - out_param_dict, out_results_list = run_bayesian_optimisation( - tb_data, - dsp_config, - [fom], - optimisers, - fom_kwargs=kwarg_dict, - db_dict=db_dict, - nan_val=nan_vals, - n_iter=opt_dict["n_iter"], - ) - - Props.add_to(db_dict, out_param_dict) - - # db_dict.update(out_param_dict) - - t2 = time.time() - log.info(f"Optimiser finished in {(t2-t1)/60} minutes") - - out_alpha_dict = {} - out_alpha_dict["cuspEmax_ctc"] = { - "expression": "cuspEmax*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["cuspEftp_ctc"] = { - "expression": "cuspEftp*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["zacEmax_ctc"] = { - "expression": "zacEmax*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["zacEftp_ctc"] = { - "expression": "zacEftp*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["trapEmax_ctc"] = { - "expression": "trapEmax*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["trapEftp_ctc"] = { - "expression": "trapEftp*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, - } - if "ctc_params" in db_dict: - db_dict["ctc_params"].update(out_alpha_dict) - else: - db_dict.update({"ctc_params": out_alpha_dict}) - - Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) - with Path(args.qbb_grid_path).open("wb") as f: - pkl.dump(optimisers, f) - -else: - Path(args.qbb_grid_path).touch() - -Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.final_dsp_pars, db_dict) - -if args.plot_path: - if args.inplots: - with Path(args.inplots).open("rb") as r: - plot_dict = pkl.load(r) - else: - plot_dict = {} - - plot_dict["trap_optimisation"] = { - "kernel_space": bopt_trap.plot(init_samples=sample_x), - "acq_space": bopt_trap.plot_acq(init_samples=sample_x), - } - - plot_dict["cusp_optimisation"] = { - "kernel_space": bopt_cusp.plot(init_samples=sample_x), - "acq_space": bopt_cusp.plot_acq(init_samples=sample_x), - } - - plot_dict["zac_optimisation"] = { - "kernel_space": bopt_zac.plot(init_samples=sample_x), - "acq_space": bopt_zac.plot_acq(init_samples=sample_x), - } - - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_path).open("wb") as w: - pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py deleted file mode 100644 index 7e843e8..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py +++ /dev/null @@ -1,108 +0,0 @@ -import argparse -import pickle as pkl -import time -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -import pygama.pargen.noise_optimization as pno -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes -from pygama.pargen.dsp_optimize import run_one_dsp - -from ..log import build_log - -sto = lh5.LH5Store() - -argparser = argparse.ArgumentParser() -argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) -argparser.add_argument("--inplots", help="inplots", type=str) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) -argparser.add_argument("--plot_path", help="plot_path", type=str) - -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] - -log = build_log(config_dict, args.log) - - -t0 = time.time() - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -dsp_config = config_dict["inputs"]["processing_chain"][args.channel] -opt_json = config_dict["inputs"]["optimiser_config"][args.channel] - -opt_dict = Props.read_from(opt_json) -db_dict = Props.read_from(args.database) - -if opt_dict.pop("run_nopt") is True: - with Path(args.raw_filelist).open() as f: - files = f.read().splitlines() - - raw_files = sorted(files) - - energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] - idxs = np.where(energies.nda == 0)[0] - tb_data = sto.read( - f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs - )[0] - t1 = time.time() - log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") - - log.info(f"Select baselines {len(tb_data)}") - dsp_data = run_one_dsp(tb_data, dsp_config) - cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) - cut_idxs = get_cut_indexes(dsp_data, cut_dict) - tb_data = sto.read( - f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] - )[0] - log.info(f"... {len(tb_data)} baselines after cuts") - - if isinstance(dsp_config, (str, list)): - dsp_config = Props.read_from(dsp_config) - - if args.plot_path: - out_dict, plot_dict = pno.noise_optimization( - tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 - ) - else: - out_dict = pno.noise_optimization( - raw_files, dsp_config, db_dict.copy(), opt_dict, channel - ) - - t2 = time.time() - log.info(f"Optimiser finished in {(t2-t0)/60} minutes") -else: - out_dict = {} - plot_dict = {} - -if args.plot_path: - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - if args.inplots: - with Path(args.inplots).open("rb") as r: - old_plot_dict = pkl.load(r) - plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) - else: - plot_dict = {"noise_optimisation": plot_dict} - with Path(args.plot_path).open("wb") as f: - pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) - -Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py deleted file mode 100644 index 67d8a64..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py +++ /dev/null @@ -1,20 +0,0 @@ -import argparse -from pathlib import Path - -from dbetto.catalog import Props - -argparser = argparse.ArgumentParser() -argparser.add_argument("--log", help="log file", type=str) -argparser.add_argument("--output_file", help="output par file", type=str, required=True) -argparser.add_argument("--input_file", help="input par file", type=str, required=True) -argparser.add_argument("--svm_file", help="svm file", required=True) -args = argparser.parse_args() - -par_data = Props.read_from(args.input_file) - -file = f"'$_/{Path(args.svm_file).name}'" - -par_data["svm"] = {"model_file": file} - -Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.output_file, par_data) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py deleted file mode 100644 index 1ca084b..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py +++ /dev/null @@ -1,139 +0,0 @@ -import argparse -import pickle as pkl -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids -from pygama.pargen.dsp_optimize import run_one_dsp -from pygama.pargen.extract_tau import ExtractTau - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--plot_path", help="plot path", type=str, required=False) -argparser.add_argument("--output_file", help="output file", type=str, required=True) - -argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) - -argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) -argparser.add_argument( - "--tcm_files", help="tcm_files", nargs="*", type=str, required=False -) -args = argparser.parse_args() - -sto = lh5.LH5Store() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] - -log = build_log(config_dict, args.log) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -channel_dict = config_dict["inputs"]["processing_chain"][args.channel] -kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] - -kwarg_dict = Props.read_from(kwarg_dict) - -if kwarg_dict["run_tau"] is True: - dsp_config = Props.read_from(channel_dict) - kwarg_dict.pop("run_tau") - if ( - isinstance(args.raw_files, list) - and args.raw_files[0].split(".")[-1] == "filelist" - ): - input_file = args.raw_files[0] - with Path(input_file).open() as f: - input_file = f.read().splitlines() - else: - input_file = args.raw_files - - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) - - data = sto.read( - f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] - )[0].view_as("pd") - threshold = kwarg_dict.pop("threshold") - - discharges = data["t_sat_lo"] > 0 - discharge_timestamps = np.where(data["timestamp"][discharges])[0] - is_recovering = np.full(len(data), False, dtype=bool) - for tstamp in discharge_timestamps: - is_recovering = is_recovering | np.where( - ( - ((data["timestamp"] - tstamp) < 0.01) - & ((data["timestamp"] - tstamp) > 0) - ), - True, - False, - ) - cuts = np.where( - (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering) - )[0] - - tb_data = sto.read( - f"{channel}/raw", - input_file, - idx=cuts, - n_rows=kwarg_dict.pop("n_events"), - )[0] - - tb_out = run_one_dsp(tb_data, dsp_config) - log.debug("Processed Data") - cut_parameters = kwarg_dict.get("cut_parameters", None) - if cut_parameters is not None: - idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters) - log.debug("Applied cuts") - log.debug(f"{len(idxs)} events passed cuts") - else: - idxs = np.full(len(tb_out), True, dtype=bool) - - tau = ExtractTau(dsp_config, kwarg_dict["wf_field"]) - slopes = tb_out["tail_slope"].nda - log.debug("Calculating pz constant") - - tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) - - if args.plot_path: - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - - plot_dict = tau.plot_waveforms_after_correction( - tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") - ) - plot_dict.update(tau.plot_slopes(slopes[idxs])) - - with Path(args.plot_path).open("wb") as f: - pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) -else: - out_dict = {} - -Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.output_file, tau.output_dict) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py deleted file mode 100644 index 7e13ed8..0000000 --- a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py +++ /dev/null @@ -1,290 +0,0 @@ -from __future__ import annotations - -import argparse -import pickle as pkl -import warnings -from pathlib import Path -from typing import Callable - -import numpy as np -import pandas as pd -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak -from pygama.pargen.data_cleaning import get_tcm_pulser_ids -from pygama.pargen.utils import load_data - -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - - -def get_results_dict(aoe_class): - return { - "cal_energy_param": aoe_class.cal_energy_param, - "dt_param": aoe_class.dt_param, - "rt_correction": aoe_class.dt_corr, - "1000-1300keV": aoe_class.timecorr_df.to_dict("index"), - "correction_fit_results": aoe_class.energy_corr_res_dict, - "low_cut": aoe_class.low_cut_val, - "high_cut": aoe_class.high_cut_val, - "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"), - "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"), - } - - -def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): - if plot_dict is not None: - for key, item in plot_options.items(): - if item["options"] is not None: - plot_dict[key] = item["function"](aoe_class, data, **item["options"]) - else: - plot_dict[key] = item["function"](aoe_class, data) - else: - plot_dict = {} - return plot_dict - - -def aoe_calibration( - data: pd.Dataframe, - cal_dicts: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_func: Callable, - pdf: Callable = aoe_peak, - selection_string: str = "", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict | None = None, - high_cut_val: int = 3, - mean_func: Callable = Pol1, - sigma_func: Callable = SigmaFit, - # dep_acc: float = 0.9, - dt_param: str = "dt_eff", - comptBands_width: int = 20, - plot_options: dict | None = None, - debug_mode: bool = False, -): - data["AoE_Uncorr"] = data[current_param] / data[energy_param] - aoe = CalAoE( - cal_dicts=cal_dicts, - cal_energy_param=cal_energy_param, - eres_func=eres_func, - pdf=pdf, - selection_string=selection_string, - dt_corr=dt_corr, - dep_correct=dep_correct, - dt_cut=dt_cut, - dt_param=dt_param, - high_cut_val=high_cut_val, - mean_func=mean_func, - sigma_func=sigma_func, - compt_bands_width=comptBands_width, - debug_mode=debug_mode | args.debug, - ) - - aoe.update_cal_dicts( - { - "AoE_Uncorr": { - "expression": f"{current_param}/{energy_param}", - "parameters": {}, - } - } - ) - - aoe.calibrate(data, "AoE_Uncorr") - log.info("Calibrated A/E") - return ( - cal_dicts, - get_results_dict(aoe), - fill_plot_dict(aoe, data, plot_options), - aoe, - ) - - -argparser = argparse.ArgumentParser() -argparser.add_argument("files", help="files", nargs="*", type=str) -argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) - -argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) -argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) - - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) -argparser.add_argument("--hit_pars", help="hit_pars", type=str) -argparser.add_argument("--aoe_results", help="aoe_results", type=str) - -argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] - -log = build_log(config_dict, args.log) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -channel_dict = config_dict["inputs"]["aoecal_config"][args.channel] -kwarg_dict = Props.read_from(channel_dict) - - -ecal_dict = Props.read_from(args.ecal_file) -cal_dict = ecal_dict["pars"] -eres_dict = ecal_dict["results"]["ecal"] - -with Path(args.eres_file).open("rb") as o: - object_dict = pkl.load(o) - -if kwarg_dict["run_aoe"] is True: - kwarg_dict.pop("run_aoe") - - pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak - - sigma_func = ( - eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit - ) - - mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 - - if "plot_options" in kwarg_dict: - for field, item in kwarg_dict["plot_options"].items(): - kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - - with Path(args.files[0]).open() as f: - files = f.read().splitlines() - files = sorted(files) - - try: - eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - - def eres_func(x): - return eval(eres["expression"], dict(x=x, **eres["parameters"])) - - except KeyError: - - def eres_func(x): - return x * np.nan - - params = [ - kwarg_dict["current_param"], - "tp_0_est", - "tp_99", - kwarg_dict["energy_param"], - kwarg_dict["cal_energy_param"], - kwarg_dict["cut_field"], - "timestamp", - ] - - if "dt_param" in kwarg_dict: - params += kwarg_dict["dt_param"] - else: - params += "dt_eff" - - if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None: - cal_dict.update(kwarg_dict["dt_cut"]["cut"]) - params.append(kwarg_dict["dt_cut"]["out_param"]) - - # load data in - data, threshold_mask = load_data( - files, - f"{channel}/dsp", - cal_dict, - params=params, - threshold=kwarg_dict.pop("threshold"), - return_selection_mask=True, - ) - - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) - - data["is_pulser"] = mask[threshold_mask] - - cal_dict, out_dict, plot_dict, obj = aoe_calibration( - data, - cal_dicts=cal_dict, - eres_func=eres_func, - selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", - pdf=pdf, - mean_func=mean_func, - sigma_func=sigma_func, - **kwarg_dict, - ) - obj.pdf = obj.pdf.name - - # need to change eres func as can't pickle lambdas - try: - obj.eres_func = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - except KeyError: - obj.eres_func = {} -else: - out_dict = {} - plot_dict = {} - obj = None - -if args.plot_file: - common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None - if args.inplots: - with Path(args.inplots).open("rb") as r: - out_plot_dict = pkl.load(r) - out_plot_dict.update({"aoe": plot_dict}) - else: - out_plot_dict = {"aoe": plot_dict} - - if "common" in list(out_plot_dict) and common_dict is not None: - out_plot_dict["common"].update(common_dict) - elif common_dict is not None: - out_plot_dict["common"] = common_dict - - Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_file).open("wb") as w: - pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - -Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) -results_dict = dict(**ecal_dict["results"], aoe=out_dict) -final_hit_dict = { - "pars": {"operations": cal_dict}, - "results": results_dict, -} - -final_hit_dict = convert_dict_np_to_float(final_hit_dict) - -Props.write_to(args.hit_pars, final_hit_dict) - -Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) -final_object_dict = dict( - **object_dict, - aoe=obj, -) -with Path(args.aoe_results).open("wb") as w: - pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_lq.py b/workflow/src/legenddataflow/scripts/pars_hit_lq.py deleted file mode 100644 index a7a2601..0000000 --- a/workflow/src/legenddataflow/scripts/pars_hit_lq.py +++ /dev/null @@ -1,283 +0,0 @@ -from __future__ import annotations - -import argparse -import pickle as pkl -import warnings -from pathlib import Path - -import numpy as np -import pandas as pd -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.math.distributions import gaussian -from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.data_cleaning import get_tcm_pulser_ids -from pygama.pargen.lq_cal import * # noqa: F403 -from pygama.pargen.lq_cal import LQCal -from pygama.pargen.utils import load_data - -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - - -def get_results_dict(lq_class): - return { - "cal_energy_param": lq_class.cal_energy_param, - "DEP_means": lq_class.timecorr_df.to_dict("index"), - "rt_correction": lq_class.dt_fit_pars, - "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), - "cut_value": lq_class.cut_val, - "sfs": lq_class.low_side_sf.to_dict("index"), - } - - -def fill_plot_dict(lq_class, data, plot_options, plot_dict=None): - if plot_dict is not None: - for key, item in plot_options.items(): - if item["options"] is not None: - plot_dict[key] = item["function"](lq_class, data, **item["options"]) - else: - plot_dict[key] = item["function"](lq_class, data) - else: - plot_dict = {} - return plot_dict - - -def lq_calibration( - data: pd.DataFrame, - cal_dicts: dict, - energy_param: str, - cal_energy_param: str, - dt_param: str, - eres_func: callable, - cdf: callable = gaussian, - selection_string: str = "", - plot_options: dict | None = None, - debug_mode: bool = False, -): - """Loads in data from the provided files and runs the LQ calibration on said files - - Parameters - ---------- - data: pd.DataFrame - A dataframe containing the data used for calibrating LQ - cal_dicts: dict - A dict of hit-level operations to apply to the data - energy_param: string - The energy parameter of choice. Used for normalizing the - raw lq values - cal_energy_param: string - The calibrated energy parameter of choice - dt_param: string - The drift time parameter of choice - eres_func: callable - The energy resolution functions - cdf: callable - The CDF used for the binned fitting of LQ distributions - selection_string: string - A string of flags to apply to the data when running the calibration - plot_options: dict - A dict containing the plot functions the user wants to run,and any - user options to provide those plot functions - - Returns - ------- - cal_dicts: dict - The user provided dict, updated with hit-level operations for LQ - results_dict: dict - A dict containing the results of the LQ calibration - plot_dict: dict - A dict containing all the figures specified by the plot options - lq: LQCal class - The LQCal object used for the LQ calibration - """ - - lq = LQCal( - cal_dicts, - cal_energy_param, - dt_param, - eres_func, - cdf, - selection_string, - debug_mode=debug_mode | args.debug, - ) - - data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) - - lq.update_cal_dicts( - { - "LQ_Ecorr": { - "expression": f"lq80/{energy_param}", - "parameters": {}, - } - } - ) - - lq.calibrate(data, "LQ_Ecorr") - log.info("Calibrated LQ") - return cal_dicts, get_results_dict(lq), fill_plot_dict(lq, data, plot_options), lq - - -argparser = argparse.ArgumentParser() -argparser.add_argument("files", help="files", nargs="*", type=str) -argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) - -argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) -argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) -argparser.add_argument("--hit_pars", help="hit_pars", type=str) -argparser.add_argument("--lq_results", help="lq_results", type=str) - -argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] - -log = build_log(config_dict, args.log) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - - -channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] -kwarg_dict = Props.read_from(channel_dict) - -ecal_dict = Props.read_from(args.ecal_file) -cal_dict = ecal_dict["pars"]["operations"] -eres_dict = ecal_dict["results"]["ecal"] - -with Path(args.eres_file).open("rb") as o: - object_dict = pkl.load(o) - -if kwarg_dict["run_lq"] is True: - kwarg_dict.pop("run_lq") - - cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian - - if "plot_options" in kwarg_dict: - for field, item in kwarg_dict["plot_options"].items(): - kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - - with Path(args.files[0]).open() as f: - files = f.read().splitlines() - files = sorted(files) - - try: - eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - - def eres_func(x): - return eval(eres["expression"], dict(x=x, **eres["parameters"])) - - except KeyError: - - def eres_func(x): - return x * np.nan - - params = [ - "lq80", - "dt_eff", - kwarg_dict["energy_param"], - kwarg_dict["cal_energy_param"], - kwarg_dict["cut_field"], - ] - - # load data in - data, threshold_mask = load_data( - files, - f"{channel}/dsp", - cal_dict, - params=params, - threshold=kwarg_dict.pop("threshold"), - return_selection_mask=True, - ) - - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) - - data["is_pulser"] = mask[threshold_mask] - - cal_dict, out_dict, plot_dict, obj = lq_calibration( - data, - selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", - cal_dicts=cal_dict, - eres_func=eres_func, - cdf=cdf, - **kwarg_dict, - ) - - # need to change eres func as can't pickle lambdas - try: - obj.eres_func = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - except KeyError: - obj.eres_func = {} -else: - out_dict = {} - plot_dict = {} - obj = None - -if args.plot_file: - common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None - if args.inplots: - with Path(args.inplots).open("rb") as r: - out_plot_dict = pkl.load(r) - out_plot_dict.update({"lq": plot_dict}) - else: - out_plot_dict = {"lq": plot_dict} - - if "common" in list(out_plot_dict) and common_dict is not None: - out_plot_dict["common"].update(common_dict) - elif common_dict is not None: - out_plot_dict["common"] = common_dict - - Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_file).open("wb") as w: - pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - - -final_hit_dict = convert_dict_np_to_float( - { - "pars": {"operations": cal_dict}, - "results": dict(**eres_dict, lq=out_dict), - } -) -Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.hit_pars, final_hit_dict) - -final_object_dict = dict( - **object_dict, - lq=obj, -) -Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) -with Path(args.lq_results).open("wb") as w: - pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py deleted file mode 100644 index ad46f0c..0000000 --- a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py +++ /dev/null @@ -1,57 +0,0 @@ -import argparse -import logging -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import get_tcm_pulser_ids - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) - -argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] - -log = build_log(config_dict, args.log) - -sto = lh5.LH5Store() -log = logging.getLogger(__name__) - - -kwarg_dict = config_dict["inputs"]["pulser_config"] -kwarg_dict = Props.read_from(kwarg_dict) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid}" - -if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": - tcm_files = args.tcm_files[0] - with Path(tcm_files).open() as f: - tcm_files = f.read().splitlines() -else: - tcm_files = args.tcm_files -# get pulser mask from tcm files -tcm_files = sorted(np.unique(tcm_files)) -ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") -) - -Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/workflow/src/legenddataflow/scripts/tier/dsp.py b/workflow/src/legenddataflow/scripts/tier/dsp.py new file mode 100644 index 0000000..906985b --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/dsp.py @@ -0,0 +1,171 @@ +import argparse +import re +import time +import warnings +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from dspeed import build_dsp +from legendmeta import LegendMetadata +from lgdo import lh5 + +from ...log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +def replace_list_with_array(dic): + for key, value in dic.items(): + if isinstance(value, dict): + dic[key] = replace_list_with_array(value) + elif isinstance(value, list): + dic[key] = np.array(value, dtype="float32") + else: + pass + return dic + + +def build_tier_dsp() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--configs", help="configs path", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--tier", help="Tier", type=str, required=True) + + argparser.add_argument( + "--pars_file", help="database file for detector", nargs="*", default=[] + ) + argparser.add_argument("--input", help="input file", type=str) + + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument("--db_file", help="db file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] + if args.tier in ["dsp", "psp"]: + config_dict = config_dict["tier_dsp"] + elif args.tier in ["ann", "pan"]: + config_dict = config_dict["tier_ann"] + else: + msg = f"Tier {args.tier} not supported" + raise ValueError(msg) + + log = build_log(config_dict, args.log) + + channel_dict = config_dict["inputs"]["processing_chain"] + settings_dict = config_dict["options"].get("settings", {}) + if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) + + meta = LegendMetadata(path=args.metadata) + chan_map = meta.channelmap(args.timestamp, system=args.datatype) + + if args.tier in ["ann", "pan"]: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) + for chan, file in channel_dict.items() + } + else: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() + } + db_files = [ + par_file + for par_file in args.pars_file + if Path(par_file).suffix in (".json", ".yaml", ".yml") + ] + + database_dic = Props.read_from(db_files, subst_pathvar=True) + database_dic = replace_list_with_array(database_dic) + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + rng = np.random.default_rng() + rand_num = f"{rng.integers(0, 99999):05d}" + temp_output = f"{args.output}.{rand_num}" + + start = time.time() + + build_dsp( + args.input, + temp_output, + {}, + database=database_dic, + chan_config=channel_dict, + write_mode="r", + buffer_len=settings_dict.get("buffer_len", 1000), + block_width=settings_dict.get("block_width", 16), + ) + + log.info(f"build_dsp finished in {time.time()-start}") + Path(temp_output).rename(args.output) + + key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") + + if args.tier in ["dsp", "psp"]: + raw_channels = [ + channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel) + ] + raw_fields = [ + field.split("/")[-1] + for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/") + ] + + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, + "dsp": outputs, + }, + "valid_keys": { + key: {"valid_channels": {"raw": raw_channels, "dsp": channels}} + }, + } + else: + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, + } + + Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/tier/evt.py b/workflow/src/legenddataflow/scripts/tier/evt.py new file mode 100644 index 0000000..15a76d1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/evt.py @@ -0,0 +1,187 @@ +import argparse +import json +import time +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from dbetto import Props, TextDB +from legendmeta import LegendMetadata +from lgdo.types import Array +from pygama.evt import build_evt + +from ...log import build_log + +sto = lh5.LH5Store() + + +def find_matching_values_with_delay(arr1, arr2, jit_delay): + matching_values = [] + + # Create an array with all possible delay values + delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay + + for delay in delays: + arr2_delayed = arr2 + delay + + # Find matching values and indices + mask = np.isin(arr1, arr2_delayed, assume_unique=True) + matching_values.extend(arr1[mask]) + + return np.unique(matching_values) + + +def build_tier_evt() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--hit_file", help="hit file", type=str) + argparser.add_argument("--dsp_file", help="dsp file", type=str) + argparser.add_argument("--tcm_file", help="tcm file", type=str) + argparser.add_argument("--ann_file", help="ann file") + argparser.add_argument("--xtc_file", help="xtc file", type=str) + argparser.add_argument("--par_files", help="par files", nargs="*") + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--tier", help="Tier", type=str, required=True) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--output", help="output file", type=str) + args = argparser.parse_args() + + # load in config + configs = TextDB(args.configs, lazy=True) + if args.tier in ("evt", "pet"): + rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_evt" + ] + + else: + msg = "unknown tier" + raise ValueError(msg) + + config_dict = rule_dict["inputs"] + evt_config_file = config_dict["evt_config"] + + log = build_log(rule_dict, args.log) + + meta = LegendMetadata(args.metadata, lazy=True) + chmap = meta.channelmap(args.timestamp) + + evt_config = Props.read_from(evt_config_file) + + if args.datatype in ("phy", "xtc"): + exp_string = evt_config["operations"]["geds___energy"]["expression"] + exp_string = exp_string.replace( + 'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"' + ) + exp_string = exp_string.replace( + 'cal_par_files=""', f"cal_par_files={args.par_files}" + ) + exp_string2 = exp_string.replace( + 'return_mode="energy"', 'return_mode="tcm_index"' + ) + + file_path_config = { + "operations": { + "geds___energy": {"expression": exp_string}, + "_geds___tcm_idx": {"expression": exp_string2}, + } + } + + log.debug(json.dumps(file_path_config, indent=2)) + + Props.add_to(evt_config, file_path_config) + + # block for snakemake to fill in channel lists + for field, dic in evt_config["channels"].items(): + if isinstance(dic, dict): + chans = chmap.map("system", unique=False)[dic["system"]] + if "selectors" in dic: + try: + for k, val in dic["selectors"].items(): + chans = chans.map(k, unique=False)[val] + except KeyError: + chans = None + if chans is not None: + chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] + else: + chans = [] + evt_config["channels"][field] = chans + + log.debug(json.dumps(evt_config["channels"], indent=2)) + + t_start = time.time() + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + file_table = { + "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), + } + + if args.ann_file is not None: + file_table["ann"] = (args.ann_file, "dsp", "ch{}") + + table = build_evt( + file_table, + evt_config, + ) + + if "muon_config" in config_dict and config_dict["muon_config"] is not None: + muon_config = Props.read_from(config_dict["muon_config"]["evt_config"]) + field_config = Props.read_from(config_dict["muon_config"]["field_config"]) + # block for snakemake to fill in channel lists + for field, dic in muon_config["channels"].items(): + if isinstance(dic, dict): + chans = chmap.map("system", unique=False)[dic["system"]] + if "selectors" in dic: + try: + for k, val in dic["selectors"].items(): + chans = chans.map(k, unique=False)[val] + except KeyError: + chans = None + if chans is not None: + chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] + else: + chans = [] + muon_config["channels"][field] = chans + + trigger_timestamp = table[field_config["ged_timestamp"]["table"]][ + field_config["ged_timestamp"]["field"] + ].nda + if "hardware_tcm_2" in lh5.ls(args.tcm_file): + muon_table = build_evt( + { + "tcm": (args.tcm_file, "hardware_tcm_2", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), + }, + muon_config, + ) + + muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda + muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda + if len(muon_timestamp[muon_tbl_flag]) > 0: + is_muon_veto_triggered = find_matching_values_with_delay( + trigger_timestamp, + muon_timestamp[muon_tbl_flag], + field_config["jitter"], + ) + muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered) + else: + muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) + else: + muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) + table[field_config["output_field"]["table"]].add_column( + field_config["output_field"]["field"], Array(muon_flag) + ) + + sto.write(obj=table, name="evt", lh5_file=args.output, wo_mode="a") + + t_elap = time.time() - t_start + log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") diff --git a/workflow/src/legenddataflow/scripts/tier/hit.py b/workflow/src/legenddataflow/scripts/tier/hit.py new file mode 100644 index 0000000..9fd489f --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/hit.py @@ -0,0 +1,98 @@ +import argparse +import time +from pathlib import Path + +from dbetto.catalog import Props +from legendmeta import LegendMetadata, TextDB +from lgdo import lh5 +from pygama.hit.build_hit import build_hit + +from ...log import build_log + + +def build_tier_hit() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--pars_file", help="hit pars file", nargs="*") + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--tier", help="Tier", type=str, required=True) + + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument("--db_file", help="db file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True) + if args.tier == "hit" or args.tier == "pht": + config_dict = configs.on(args.timestamp, system=args.datatype)[ + "snakemake_rules" + ]["tier_hit"] + else: + msg = "unknown tier" + raise ValueError(msg) + + log = build_log(config_dict, args.log) + + channel_dict = config_dict["inputs"]["hit_config"] + settings_dict = config_dict["options"].get("settings", {}) + if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) + + meta = LegendMetadata(path=args.metadata) + chan_map = meta.channelmap(args.timestamp, system=args.datatype) + + pars_dict = Props.read_from(args.pars_file) + pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} + + hit_dict = {} + channels_present = lh5.ls(args.input) + for channel in pars_dict: + chan_pars = pars_dict[channel].copy() + try: + detector = chan_map.map("daq.rawid")[int(channel[2:])].name + if detector in channel_dict: + cfg_dict = Props.read_from(channel_dict[detector]) + Props.add_to(cfg_dict, chan_pars) + chan_pars = cfg_dict + + if channel in channels_present: + hit_dict[f"{channel}/dsp"] = chan_pars + except KeyError: + pass + + t_start = time.time() + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) + t_elap = time.time() - t_start + log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") + + hit_outputs = {} + hit_channels = [] + for channel, file in channel_dict.items(): + output = Props.read_from(file)["outputs"] + in_dict = False + for entry in hit_outputs: + if hit_outputs[entry]["fields"] == output: + hit_outputs[entry]["channels"].append(channel) + in_dict = True + if in_dict is False: + hit_outputs[f"group{len(list(hit_outputs))+1}"] = { + "channels": [channel], + "fields": output, + } + hit_channels.append(channel) + + key = args.output.replace(f"-tier_{args.tier}.lh5", "") + + full_dict = { + "valid_fields": {args.tier: hit_outputs}, + "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, + } + + Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_blind.py b/workflow/src/legenddataflow/scripts/tier/raw_blind.py new file mode 100644 index 0000000..19eb023 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/raw_blind.py @@ -0,0 +1,185 @@ +""" +This script takes in raw data, applies the calibration to the daqenergy +and uses this to blind the data in a window of Qbb +- 25 keV. It copies over all +channels in a raw file, removing those events that fall within the ROI for Ge detectors +that have a daqenergy calibration curve and are not anti-coincidence only (AC). It removes +the whole event from all of the Ge and SiPM channels. + +In the Snakemake dataflow, this script only runs if the checkfile is found on disk, +but this is controlled by the Snakemake flow (presumably an error is thrown if the file +is not found). This script itself does not check for the existence of such a file. +""" + +import argparse +from pathlib import Path + +import numexpr as ne +import numpy as np +from dbetto.catalog import Props +from legendmeta import LegendMetadata, TextDB +from lgdo import lh5 + +from ...log import build_log + + +def build_tier_raw_blind() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument( + "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*" + ) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--metadata", help="metadata", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_raw" + ] + + build_log(config_dict, args.log) + + hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] + blinding_settings = Props.read_from(config_dict["config"]) + + centroid = blinding_settings["centroid_in_keV"] # keV + width = blinding_settings["width_in_keV"] # keV + + # list of all channels and objects in the raw file + all_channels = lh5.ls(args.input) + + # list of Ge channels and SiPM channels with associated metadata + legendmetadata = LegendMetadata(args.metadata, lazy=True) + ged_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["geds"] + .map("daq.rawid") + ) + spms_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["spms"] + .map("daq.rawid") + ) + auxs_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["auxs"] + .map("daq.rawid") + ) + blsn_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["bsln"] + .map("daq.rawid") + ) + puls_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["puls"] + .map("daq.rawid") + ) + + store = lh5.LH5Store() + + # rows that need blinding + toblind = np.array([]) + + # first, loop through the Ge detector channels, calibrate them and look for events that should be blinded + for chnum in list(ged_channels): + # skip Ge detectors that are anti-coincidence only or not able to be blinded for some other reason + if ged_channels[chnum]["analysis"]["is_blinded"] is False: + continue + + # load in just the daqenergy for now + daqenergy, _ = store.read(f"ch{chnum}/raw/daqenergy", args.input) + + # read in calibration curve for this channel + blind_curve = Props.read_from(args.blind_curve)[f"ch{chnum}"]["pars"][ + "operations" + ] + + # calibrate daq energy using pre existing curve + daqenergy_cal = ne.evaluate( + blind_curve["daqenergy_cal"]["expression"], + local_dict=dict( + daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"] + ), + ) + + # figure out which event indices should be blinded + toblind = np.append( + toblind, + np.nonzero(np.abs(np.asarray(daqenergy_cal) - centroid) <= width)[0], + ) + + # remove duplicates + toblind = np.unique(toblind) + + # total number of events (from last Ge channel loaded, should be same for all Ge channels) + allind = np.arange(len(daqenergy)) + + # gets events that should not be blinded + tokeep = allind[np.logical_not(np.isin(allind, toblind))] + + # make some temp file to write the output to before renaming it + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.output}.{rand_num}" + Path(temp_output).parent.mkdir(parents=True, exist_ok=True) + + for channel in all_channels: + try: + chnum = int(channel[2::]) + except ValueError: + # if this isn't an interesting channel, just copy it to the output file + chobj, _ = store.read(channel, args.input, decompress=False) + store.write_object( + chobj, + channel, + lh5_file=temp_output, + wo_mode="w", + **hdf_settings, + ) + continue + + if ( + (chnum not in list(ged_channels)) + and (chnum not in list(spms_channels)) + and (chnum not in list(auxs_channels)) + and (chnum not in list(blsn_channels)) + and (chnum not in list(puls_channels)) + ): + # if this is a PMT or not included for some reason, just copy it to the output file + chobj, _ = store.read(channel + "/raw", args.input, decompress=False) + store.write_object( + chobj, + group=channel, + name="raw", + lh5_file=temp_output, + wo_mode="w", + **hdf_settings, + ) + continue + + # the rest should be the Ge and SiPM channels that need to be blinded + + # read in all of the data but only for the unblinded events + blinded_chobj, _ = store.read( + channel + "/raw", args.input, idx=tokeep, decompress=False + ) + + # now write the blinded data for this channel + store.write_object( + blinded_chobj, + group=channel, + name="raw", + lh5_file=temp_output, + wo_mode="w", + **hdf_settings, + ) + + # rename the temp file + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py new file mode 100644 index 0000000..fefc8a1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py @@ -0,0 +1,72 @@ +import argparse +from copy import deepcopy +from pathlib import Path + +import numpy as np +from daq2lh5 import build_raw +from dbetto import TextDB +from dbetto.catalog import Props + +from ...log import build_log + + +def build_tier_raw_fcio() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("input", help="input file", type=str) + argparser.add_argument("output", help="output file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + config_dict = ( + TextDB(args.configs, lazy=True) + .on(args.timestamp, system=args.datatype) + .snakemake_rules.tier_raw_fcio + ) + + build_log(config_dict, args.log) + + channel_dict = config_dict.inputs + settings = Props.read_from(channel_dict.settings) + channel_dict = channel_dict.out_spec + all_config = Props.read_from(channel_dict.gen_config) + + chmap = ( + TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") + ) + + if "geds_config" in channel_dict: + raise NotImplementedError() + + if "spms_config" in channel_dict: + spm_config = Props.read_from(channel_dict.spms_config) + spm_channels = chmap.spms.map("daq.rawid") + + for rawid, chinfo in spm_channels.items(): + cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"]) + cfg_block["key_list"] = [chinfo.daq.fc_channel] + spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block + + spm_config["FCEventDecoder"].pop("__output_table_name__") + + Props.add_to(all_config, spm_config) + + if "auxs_config" in channel_dict: + raise NotImplementedError() + + if "muon_config" in channel_dict: + raise NotImplementedError() + + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.output}.{rand_num}" + + build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) + + # rename the temp file + Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_orca.py b/workflow/src/legenddataflow/scripts/tier/raw_orca.py new file mode 100644 index 0000000..00d7751 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/raw_orca.py @@ -0,0 +1,110 @@ +import argparse +import logging +from pathlib import Path + +import numpy as np +from daq2lh5 import build_raw +from dbetto import TextDB +from dbetto.catalog import Props + +from ...log import build_log + + +def build_tier_raw_orca() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("input", help="input file", type=str) + argparser.add_argument("output", help="output file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--log", help="log file") + args = argparser.parse_args() + + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + configs = TextDB(args.configs, lazy=True) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_raw" + ] + + build_log(config_dict, args.log) + + channel_dict = config_dict["inputs"] + settings = Props.read_from(channel_dict["settings"]) + channel_dict = channel_dict["out_spec"] + all_config = Props.read_from(channel_dict["gen_config"]) + + chmap = TextDB(args.chan_maps, lazy=True) + + if "geds_config" in list(channel_dict): + ged_config = Props.read_from(channel_dict["geds_config"]) + + ged_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["geds"] + .map("daq.rawid") + ) + + ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) + Props.add_to(all_config, ged_config) + + if "spms_config" in list(channel_dict): + spm_config = Props.read_from(channel_dict["spms_config"]) + + spm_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["spms"] + .map("daq.rawid") + ) + + spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) + Props.add_to(all_config, spm_config) + + if "auxs_config" in list(channel_dict): + aux_config = Props.read_from(channel_dict["auxs_config"]) + aux_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["auxs"] + .map("daq.rawid") + ) + aux_channels += list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["puls"] + .map("daq.rawid") + ) + aux_channels += list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["bsln"] + .map("daq.rawid") + ) + top_key = next(iter(aux_config)) + aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted( + aux_channels + ) + Props.add_to(all_config, aux_config) + + if "muon_config" in list(channel_dict): + muon_config = Props.read_from(channel_dict["muon_config"]) + muon_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["muon"] + .map("daq.rawid") + ) + top_key = next(iter(muon_config)) + muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted( + muon_channels + ) + Props.add_to(all_config, muon_config) + + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.output}.{rand_num}" + + build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) + + # rename the temp file + Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/tier/skm.py b/workflow/src/legenddataflow/scripts/tier/skm.py new file mode 100644 index 0000000..a698629 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/skm.py @@ -0,0 +1,96 @@ +import argparse + +import awkward as ak +from dbetto import TextDB +from dbetto.catalog import Props +from lgdo import lh5 +from lgdo.types import Array, Struct, Table, VectorOfVectors + +from ...log import build_log + + +def get_all_out_fields(input_table, out_fields, current_field=""): + for key in input_table: + field = input_table[key] + key_string = f"{current_field}.{key}" + if isinstance(field, (Table, Struct)): + get_all_out_fields(field, out_fields, key_string) + else: + if key_string not in out_fields: + out_fields.append(key_string) + return out_fields + + +def build_tier_skm() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--evt_file", help="evt file", required=True) + argparser.add_argument("--configs", help="configs", required=True) + argparser.add_argument("--datatype", help="datatype", required=True) + argparser.add_argument("--timestamp", help="timestamp", required=True) + argparser.add_argument("--log", help="log file", default=None) + argparser.add_argument("--output", help="output file", required=True) + args = argparser.parse_args() + + # load in config + config_dict = TextDB(args.configs, lazy=True).on( + args.timestamp, system=args.datatype + )["snakemake_rules"]["tier_skm"] + + build_log(config_dict, args.log) + + skm_config_file = config_dict["inputs"]["skm_config"] + evt_filter = Props.read_from(skm_config_file)["evt_filter"] + out_fields = Props.read_from(skm_config_file)["keep_fields"] + + store = lh5.LH5Store() + + evt = lh5.read_as("evt", args.evt_file, "ak") # noqa: F841 + + # remove unwanted events + skm = eval(f"evt[{evt_filter}]") + # make it rectangular and make an LGDO Table + out_table = Table(skm) + + for field in out_fields: + items = field.split(".") + ptr1 = out_table + for item in items[:-1]: + ptr1 = ptr1[item] + + if isinstance(ptr1[items[-1]], Table): + out_fields.remove(field) + out_fields = get_all_out_fields( + ptr1[items[-1]], out_fields, current_field=field + ) + + # remove unwanted columns + out_table_skm = Table(size=len(out_table)) + for field in out_fields: + # table nesting is labeled by '.' in the config + items = field.split(".") + # get to actual nested field recursively + ptr1 = out_table + ptr2 = out_table_skm + for item in items[:-1]: + # make intermediate tables in new table + if item not in ptr2: + ptr2.add_field(item, Table(size=len(out_table))) + # get non-table LGDO recursively + ptr1 = ptr1[item] + ptr2 = ptr2[item] + + # finally add column to new table + if isinstance(ptr1[items[-1]], VectorOfVectors): + ptr2.add_field(items[-1], Array(ak.flatten(ptr1[items[-1]].view_as("ak")))) + else: + ptr2.add_field(items[-1], ptr1[items[-1]]) + attrs = ptr1[items[-1]].attrs + + # forward LGDO attributes + # attrs = evt[field.replace(".", "_")].attrs + for attr, val in attrs.items(): + if attr != "datatype": + ptr2.attrs[attr] = val + + # write-append to disk + store.write(out_table_skm, "skm", args.output, wo_mode="w") diff --git a/workflow/src/legenddataflow/scripts/tier/tcm.py b/workflow/src/legenddataflow/scripts/tier/tcm.py new file mode 100644 index 0000000..6f53b1f --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/tcm.py @@ -0,0 +1,55 @@ +import argparse +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from daq2lh5.orca import orca_flashcam +from dbetto import TextDB +from dbetto.catalog import Props +from pygama.evt.build_tcm import build_tcm + +from ...log import build_log + + +def build_tier_tcm() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("input", help="input file", type=str) + argparser.add_argument("output", help="output file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["tier_tcm"] + + build_log(config_dict, args.log) + + settings = Props.read_from(config_dict["inputs"]["config"]) + + rng = np.random.default_rng() + temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + # get the list of channels by fcid + ch_list = lh5.ls(args.input, "/ch*") + fcid_channels = {} + for ch in ch_list: + key = int(ch[2:]) + fcid = orca_flashcam.get_fcid(key) + if fcid not in fcid_channels: + fcid_channels[fcid] = [] + fcid_channels[fcid].append(f"/{ch}/raw") + + # make a hardware_tcm_[fcid] for each fcid + for fcid, fcid_dict in fcid_channels.items(): + build_tcm( + [(args.input, fcid_dict)], + out_file=temp_output, + out_name=f"hardware_tcm_{fcid}", + wo_mode="o", + **settings, + ) + + Path(temp_output).rename(args.output)