Skip to content

Commit

Permalink
Merge pull request #48 from francois-drielsma/develop
Browse files Browse the repository at this point in the history
Multiple updates
  • Loading branch information
francois-drielsma authored Feb 11, 2025
2 parents 30c3bb5 + 6aebc3c commit 300eeb4
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 34 deletions.
13 changes: 6 additions & 7 deletions bin/larcv_check_valid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import argparse

import numpy as np
from tqdm import tqdm
from ROOT import TFile # pylint: disable=E0611
from larcv import larcv # pylint: disable=W0611

Expand Down Expand Up @@ -39,8 +40,9 @@ def main(source, source_list, output):
out_file = open(output, 'w', encoding='utf-8')

# Loop over the list of files in the input, count the tree entries for each
print(f"\nCounting entries in every tree of {len(source)} files.")
keys_list, unique_counts = [], []
for file_path in source:
for file_path in tqdm(source):
# Count the number of entries in each tree
f = TFile(file_path)
keys = [key.GetName() for key in f.GetListOfKeys()]
Expand All @@ -57,19 +59,16 @@ def main(source, source_list, output):
# Loop over the list of keys/counts for each file in the input
print(f"\nChecking validity of {len(source)} file(s).")
bad_files = []
for idx, file_path in enumerate(source):
for idx, file_path in enumerate(tqdm(source)):
# Check that there is only one entry count and it's non-zero, and
# that the list of keys matches expectation
if (len(unique_counts[idx]) != 1 or unique_counts[idx][0] < 1 or
(set(keys_list[idx]) != set(all_keys))):
print(f"- Bad file: {file_path}")
tqdm.write(f"- Bad file: {file_path}")
out_file.write(f'{file_path}\n')
bad_files.append(file_path)

suffix = ':' if len(bad_files) > 0 else '.'
print(f"\nFound {len(bad_files)} bad files{suffix}")
for bad_path in bad_files:
print(f"- {bad_path}")
print(f"\nFound {len(bad_files)} bad files.")

# Close text file
out_file.close()
Expand Down
5 changes: 3 additions & 2 deletions bin/larcv_count_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import argparse

from tqdm import tqdm
from ROOT import TFile # pylint: disable=E0611
from larcv import larcv # pylint: disable=W0611

Expand All @@ -28,7 +29,7 @@ def main(source, source_list, tree_name):
# Loop over the list of files in the input
total_entries = 0
print(f"\nCounting entries in {len(source)} file(s):")
for file_path in source:
for file_path in tqdm(source):
# Get the tree to get the number of entries from
f = TFile(file_path, 'r')
if tree_name is None:
Expand All @@ -41,7 +42,7 @@ def main(source, source_list, tree_name):
f.Close()

# Dump number for this file, increment
print(f"- Counted {num_entries} entries in {file_path}")
tqdm.write(f"- Counted {num_entries} entries in {file_path}")
total_entries += num_entries

print(f"\nTotal number of entries: {total_entries}")
Expand Down
75 changes: 56 additions & 19 deletions bin/output_check_valid.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,26 @@

import h5py
import numpy as np
from tqdm import tqdm
from ROOT import TFile # pylint: disable=E0611
from larcv import larcv # pylint: disable=W0611


def main(source, source_list, output, dest, suffix, tree_name):
def main(source, source_list, output, dest, suffix, event_list, tree_name):
"""Checks the output of the SPINE process.
The script loops over the input files, check that there is an output file
in the expected location and further checks that the output file entry
count matches that of the input file.
Produces a list of input files that have no or incomplete output in a text
file (the name of which is provided with the `-o` or `--output` flag. This
file (the name of which is provided with the `-o` or `--output` flag). This
can be used to reprocess missing/incomplete input files.
.. code-block:: bash
$ python3 -c SPINE_CONFIG -S missing_list.txt
$ python3 bin/output_check_valid.py -S file_list.py -o missing.txt
--dest /path/to/output/files/ --suffix output_file_suffix
Parameters
----------
Expand All @@ -38,6 +40,9 @@ def main(source, source_list, output, dest, suffix, tree_name):
Destination directory for the original SPINE process
suffix : str
Suffix added to the end of the input files by the original SPINE process
event_list : str
Path to a file containing a list of events to process. If provided, only
events which appear on this list are required for in the output.
tree_name : str
Name of the tree to use as a reference to count the number of entries.
If not specified, takes the first tree in the list.
Expand All @@ -50,10 +55,17 @@ def main(source, source_list, output, dest, suffix, tree_name):
# Initialize the output text file
out_file = open(output, 'w', encoding='utf-8')

# If it is provided, parse the list of (run, subrun, event) triplets
if event_list is not None:
with open(event_list, 'r', encoding='utf-8') as f:
lines = f.read().splitlines()
line_list = [l.replace(',', ' ').split() for l in lines]
event_list = [(int(r), int(s), int(e)) for r, s, e in line_list]

# Loop over the list of files in the input
print("\nChecking existence and completeness of output files.")
miss_list, inc_list = [], []
for idx, file_path in enumerate(source):
for idx, file_path in enumerate(tqdm(source)):
# Find the base name of the input file (without extension)
base = os.path.basename(file_path)
stem, _ = os.path.splitext(base)
Expand All @@ -62,30 +74,51 @@ def main(source, source_list, output, dest, suffix, tree_name):
out_base = f'{stem}_{suffix}.h5'
out_path = f'{dest}/{out_base}'
if not os.path.isfile(out_path):
print(f"- Missing: {out_base}")
tqdm.write(f"- Missing: {out_base}")
out_file.write(f'{file_path}\n')
miss_list.append(file_path)
continue

# If the output does exist, check that the input and output have the
# same number of entries. Get the tree name first.
# expected number of entries. Get the tree name first.
f = TFile(file_path, 'r')
if tree_name is None:
key = [key.GetName() for key in f.GetListOfKeys()][0]
else:
key = f'{tree_name}_tree'
print(key)
key_b = key.replace('_tree', '_branch')

# Dispatch depending if the event list is provided or not
if event_list is None:
# Count the number of entries in the input file
num_entries = getattr(f, key).GetEntries()
f.Close()

# Count the number of entries in the input file
num_entries = getattr(f, key).GetEntries()
f.Close()
# Then check the number of events in the output file
with h5py.File(out_path) as f:
if len(f['events']) != num_entries:
tqdm.write(f"- Incomplete: {out_base}")
out_file.write(f'{file_path}\n')
inc_list.append(file_path)

# Then check the number of events in the output file
with h5py.File(out_path) as f:
if len(f['events']) != num_entries:
print(f"- Incomplete: {out_base}")
out_file.write(f'{file_path}\n')
inc_list.append(file_path)
else:
# Fetch the list of (run, subrun, event) triplets that should appear
tree = getattr(f, key)
check_list = []
for i in range(tree.GetEntries()):
tree.GetEntry(i)
branch = getattr(tree, key_b)
run, subrun, event = branch.run(), branch.subrun(), branch.event()
if (run, subrun, event) in event_list:
check_list.append((run, subrun, event))
f.Close()

# Check that the events which should appear are present
with h5py.File(out_path) as f:
if len(f['events']) != len(check_list):
tqdm.write(f"- Incomplete: {out_base}")
out_file.write(f'{file_path}\n')
inc_list.append(file_path)

num_miss = len(miss_list)
num_inc = len(inc_list)
Expand Down Expand Up @@ -116,17 +149,21 @@ def main(source, source_list, output, dest, suffix, tree_name):
parser.add_argument('--dest',
help='Destination directory for the original SPINE process',
type=str, required=True)

parser.add_argument('--suffix',
help='Suffix added to the input files by the original SPINE process',
type=str, required=True)

parser.add_argument('--tree_name',
parser.add_argument('--event-list',
help='File containing a list of events to process.',
type=str)

parser.add_argument('--tree-name',
help='TTree name used to count the entries.',
type=str)

args = parser.parse_args()

# Execute the main function
main(args.source, args.source_list, args.output, args.dest, args.suffix,
args.tree_name)
args.event_list, args.tree_name)
67 changes: 62 additions & 5 deletions spine/data/out/interaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@ class InteractionBase:
----------
particles : List[object]
List of particles that make up the interaction
particle_ids : np.ndarray,
primary_particles: List[object]
List of primary particles associated with the interaction
particle_ids : np.ndarray
List of Particle IDs that make up this interaction
primary_particle_ids : np.ndarray
List of primary Particle IDs associated with this interaction
num_particles : int
Number of particles that make up this interaction
num_primary_particles : int
Number of primary particles associated with this interaction
particle_counts : np.ndarray
(P) Number of particles of each species in this interaction
primary_particle_counts : np.ndarray
Expand All @@ -44,6 +50,8 @@ class InteractionBase:
(F) Indices of the optical volumes the flashes where recorded in
flash_times : np.ndarray
(F) Times at which the flashes occurred in microseconds
flash_scores : np.ndarray
(F) Flash matching quality scores reported for each match
flash_total_pe : float
Total number of photoelectrons associated with the flash
flash_hypo_pe : float
Expand All @@ -52,8 +60,11 @@ class InteractionBase:
String representing the interaction topology
"""
particles: List[object] = None
primary_particles: List[object] = None
particle_ids: np.ndarray = None
primary_particle_ids: np.ndarray = None
num_particles: int = None
num_primary_particles: int = None
particle_counts: np.ndarray = None
primary_particle_counts: np.ndarray = None
vertex: np.ndarray = None
Expand All @@ -75,7 +86,8 @@ class InteractionBase:

# Variable-length attributes as (key, dtype) pairs
_var_length_attrs = (
('particles', object), ('particle_ids', np.int32),
('particles', object), ('primary_particles', object),
('particle_ids', np.int32), ('primary_particle_ids', np.int32),
('flash_ids', np.int32), ('flash_volume_ids', np.int32),
('flash_times', np.float32), ('flash_scores', np.float32)
)
Expand All @@ -87,7 +99,7 @@ class InteractionBase:
_bool_attrs = ('is_fiducial', 'is_flash_matched')

# Attributes that must never be stored to file
_skip_attrs = ('particles',)
_skip_attrs = ('particles', 'primary_particles')

def __str__(self):
"""Human-readable string representation of the interaction object.
Expand All @@ -108,6 +120,36 @@ def __str__(self):

return info

@property
def primary_particles(self):
"""List of primary particles associated with this interaction.
Returns
-------
List[obect]
List of primary Particle objects associated with this interaction
"""
return [part for part in self.particles if part.is_primary]

@primary_particles.setter
def primary_particles(self, primary_particles):
pass

@property
def primary_particle_ids(self):
"""List of primary Particle IDs associated with this interaction.
Returns
-------
np.darray
List of primary Particle IDs associated with this interaction
"""
return np.array([part.id for part in self.primary_particles])

@primary_particle_ids.setter
def primary_particle_ids(self, primary_particle_ids):
pass

@property
def num_particles(self):
"""Number of particles that make up this interaction.
Expand All @@ -123,6 +165,21 @@ def num_particles(self):
def num_particles(self, num_particles):
pass

@property
def num_primary_particles(self):
"""Number of primary particles associated with this interaction.
Returns
-------
int
Number of particles associated with the interaction instance
"""
return len(self.primary_particle_ids)

@num_primary_particles.setter
def num_primary_particles(self, num_primary_particles):
pass

@property
def particle_counts(self):
"""Number of particles of each PID species in this interaction.
Expand Down Expand Up @@ -153,8 +210,8 @@ def primary_particle_counts(self):
(P) Number of primary particles of each PID
"""
counts = np.zeros(len(PID_LABELS) - 1, dtype=int)
for part in self.particles:
if part.pid > -1 and part.is_primary and part.is_valid:
for part in self.primary_particles:
if part.pid > -1 and part.is_valid:
counts[part.pid] += 1

return counts
Expand Down
2 changes: 1 addition & 1 deletion spine/utils/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
MUON_MASS = 105.658 # [MeV/c^2]
PION_MASS = 139.570 # [MeV/c^2]
PROT_MASS = 938.272 # [MeV/c^2]
KAON_MASS = 483.677 # [MeV/c^2]
KAON_MASS = 493.677 # [MeV/c^2]

PID_MASSES = {
PHOT_PID: PHOT_MASS,
Expand Down

0 comments on commit 300eeb4

Please sign in to comment.