Skip to content

Commit

Permalink
Merge branch 'cherry-pick-29b4cf3a' into 'release/08-01'
Browse files Browse the repository at this point in the history
Merge branch 'bugfix/filemetadata-output' into 'main'

See merge request belle2/software/basf2!2954
  • Loading branch information
Frankenfrog committed Feb 13, 2024
2 parents b4df8f6 + fb6aa60 commit 1fd9fc1
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 102 deletions.
6 changes: 3 additions & 3 deletions framework/modules/rootio/include/RootOutputModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ namespace Belle2 {

/** Pointer to the event meta data */
StoreObjPtr<EventMetaData> m_eventMetaData;
/** Pointer to the file meta data */
/** Pointer to the input file meta data */
StoreObjPtr<FileMetaData> m_fileMetaData{"", DataStore::c_Persistent};
/** File meta data finally stored in the output file */
FileMetaData m_outputFileMetaData;
/** File meta data stored in the output file */
FileMetaData* m_outputFileMetaData;
};
} // end namespace Belle2
66 changes: 35 additions & 31 deletions framework/modules/rootio/src/RootOutputModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ subsequently processed completely independent.
.. versionadded:: release-03-00-00
)DOC", m_outputSplitSize);

m_outputFileMetaData = new FileMetaData;
}


Expand All @@ -137,9 +139,7 @@ void RootOutputModule::initialize()
//Let's set this to 100PB, that should last a bit longer.
TTree::SetMaxTreeSize(1000 * 1000 * 100000000000LL);

//create a file level metadata object in the data store
m_fileMetaData.registerInDataStore();
//and make sure we have event meta data
//make sure we have event meta data
m_eventMetaData.isRequired();

//check outputSplitSize
Expand Down Expand Up @@ -283,6 +283,14 @@ void RootOutputModule::openFile()
}
}

// set the address of the FileMetaData branch for the output to a separate one from the input
TBranch* fileMetaDataBranch = m_tree[DataStore::c_Persistent]->GetBranch("FileMetaData");
if (fileMetaDataBranch) {
fileMetaDataBranch->SetAddress(&m_outputFileMetaData);
} else {
m_tree[DataStore::c_Persistent]->Branch("FileMetaData", &m_outputFileMetaData, m_basketsize, m_splitLevel);
}

dir->cd();
if (m_outputSplitSize) {
B2INFO(getName() << ": Opened " << (m_fileIndex > 0 ? "new " : "") << "file for writing" << LogVar("filename", out));
Expand Down Expand Up @@ -359,14 +367,14 @@ void RootOutputModule::event()
void RootOutputModule::fillFileMetaData()
{
bool isMC = (m_fileMetaData) ? m_fileMetaData->isMC() : true;
m_fileMetaData.create(true);
if (!isMC) m_fileMetaData->declareRealData();
new(m_outputFileMetaData) FileMetaData;
if (!isMC) m_outputFileMetaData->declareRealData();

if (m_tree[DataStore::c_Event]) {
//create an index for the event tree
TTree* tree = m_tree[DataStore::c_Event];
unsigned long numEntries = tree->GetEntries();
m_fileMetaData->setNFullEvents(m_nFullEvents);
m_outputFileMetaData->setNFullEvents(m_nFullEvents);
if (m_buildIndex && numEntries > 0) {
if (numEntries > 10000000) {
//10M events correspond to about 240MB for the TTreeIndex object. for more than ~45M entries this causes crashes, broken files :(
Expand All @@ -377,31 +385,31 @@ void RootOutputModule::fillFileMetaData()
}
}

m_fileMetaData->setNEvents(numEntries);
m_outputFileMetaData->setNEvents(numEntries);
if (m_experimentLow > m_experimentHigh) {
//starting condition so apparently no events at all
m_fileMetaData->setLow(-1, -1, 0);
m_fileMetaData->setHigh(-1, -1, 0);
m_outputFileMetaData->setLow(-1, -1, 0);
m_outputFileMetaData->setHigh(-1, -1, 0);
} else {
m_fileMetaData->setLow(m_experimentLow, m_runLow, m_eventLow);
m_fileMetaData->setHigh(m_experimentHigh, m_runHigh, m_eventHigh);
m_outputFileMetaData->setLow(m_experimentLow, m_runLow, m_eventLow);
m_outputFileMetaData->setHigh(m_experimentHigh, m_runHigh, m_eventHigh);
}
}

//fill more file level metadata
m_fileMetaData->setParents(m_parentLfns);
RootIOUtilities::setCreationData(*m_fileMetaData);
m_fileMetaData->setRandomSeed(RandomNumbers::getSeed());
m_fileMetaData->setSteering(Environment::Instance().getSteering());
m_outputFileMetaData->setParents(m_parentLfns);
RootIOUtilities::setCreationData(*m_outputFileMetaData);
m_outputFileMetaData->setRandomSeed(RandomNumbers::getSeed());
m_outputFileMetaData->setSteering(Environment::Instance().getSteering());
auto mcEvents = Environment::Instance().getNumberOfMCEvents();
if(m_outputSplitSize and mcEvents > 0) {
if(m_fileIndex == 0) B2WARNING("Number of MC Events cannot be saved when splitting output files by size, setting to 0");
mcEvents = 0;
}
m_fileMetaData->setMcEvents(mcEvents);
m_fileMetaData->setDatabaseGlobalTag(Database::Instance().getGlobalTags());
m_outputFileMetaData->setMcEvents(mcEvents);
m_outputFileMetaData->setDatabaseGlobalTag(Database::Instance().getGlobalTags());
for (const auto& item : m_additionalDataDescription) {
m_fileMetaData->setDataDescription(item.first, item.second);
m_outputFileMetaData->setDataDescription(item.first, item.second);
}
// Set the LFN to the filename: if it's a URL to directly, otherwise make sure it's absolute
std::string lfn = m_file->GetName();
Expand All @@ -412,14 +420,13 @@ void RootOutputModule::fillFileMetaData()
std::string format = EnvironmentVariables::get("BELLE2_LFN_FORMATSTRING", "");
if (!format.empty()) {
auto format_filename = boost::python::import("B2Tools.format").attr("format_filename");
lfn = boost::python::extract<std::string>(format_filename(format, m_outputFileName, m_fileMetaData->getJsonStr()));
lfn = boost::python::extract<std::string>(format_filename(format, m_outputFileName, m_outputFileMetaData->getJsonStr()));
}
m_fileMetaData->setLfn(lfn);
m_outputFileMetaData->setLfn(lfn);
//register the file in the catalog
if (m_updateFileCatalog) {
FileCatalog::Instance().registerFile(m_file->GetName(), *m_fileMetaData);
FileCatalog::Instance().registerFile(m_file->GetName(), *m_outputFileMetaData);
}
m_outputFileMetaData = *m_fileMetaData;
}


Expand All @@ -431,19 +438,12 @@ void RootOutputModule::terminate()
void RootOutputModule::closeFile()
{
if(!m_file) return;
//get pointer to file level metadata
std::unique_ptr<FileMetaData> old;
if (m_fileMetaData) old = std::make_unique<FileMetaData>(*m_fileMetaData);

fillFileMetaData();

//fill Persistent data
fillTree(DataStore::c_Persistent);

// restore old file meta data if it existed
if (old) *m_fileMetaData = *old;
old.reset();

//write the trees
TDirectory* dir = gDirectory;
m_file->cd();
Expand All @@ -465,7 +465,7 @@ void RootOutputModule::closeFile()
m_file = nullptr;

// and now add it to the metadata service as it's fully written
MetadataService::Instance().addRootOutputFile(filename, &m_outputFileMetaData);
MetadataService::Instance().addRootOutputFile(filename, m_outputFileMetaData);

// reset some variables
for (auto & entry : m_entries) {
Expand Down Expand Up @@ -495,7 +495,11 @@ void RootOutputModule::fillTree(DataStore::EDurability durability)
entry->object->SetBit(kInvalidObject);
}
//FIXME: Do we need this? in theory no but it crashes in parallel processing otherwise ¯\_(ツ)_/¯
tree.SetBranchAddress(entry->name.c_str(), &entry->object);
if (entry->name == "FileMetaData") {
tree.SetBranchAddress(entry->name.c_str(), &m_outputFileMetaData);
} else {
tree.SetBranchAddress(entry->name.c_str(), &entry->object);
}
}
tree.Fill();
for (auto* entry: m_entries[durability]) {
Expand Down
4 changes: 3 additions & 1 deletion framework/tests/filemetadata.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[INFO] Steering file: framework/tests/filemetadata.py
[INFO] Steering file: filemetadata.py
[INFO] The random number seed is set to "something important"
[INFO] Starting event processing, random seed is set to 'something important'
[INFO] Starting event processing, random seed is set to 'something important'
[INFO] Added file b2filemetadata.root
=== FileMetaData ===
LFN: /logical/file/name
nEvents: 10
Expand Down
139 changes: 72 additions & 67 deletions framework/tests/filemetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,77 +10,82 @@

from ROOT import Belle2
import os
import tempfile
import subprocess
import json
import basf2
from basf2.version import get_version
from b2test_utils import safe_process, clean_working_directory

basf2.set_random_seed("something important")

testFile = tempfile.NamedTemporaryFile(prefix='b2filemetadata_')

# Generate a small test file
main = basf2.Path()
main.add_module('EventInfoSetter', evtNumList=[9, 1], runList=[1, 15], expList=[7, 7])
main.add_module('RootOutput', outputFileName=testFile.name, updateFileCatalog=False)
basf2.process(main)

# Check the file meta data (via DataStore)
metadata = Belle2.PyStoreObj('FileMetaData', 1)

# print (metadata.getLfn()) #?
assert 10 == metadata.getNEvents()
assert 10 == metadata.getNFullEvents()

assert 7 == metadata.getExperimentLow()
assert 1 == metadata.getRunLow()
assert 1 == metadata.getEventLow()
assert 7 == metadata.getExperimentHigh()
assert 15 == metadata.getRunHigh()
assert 1 == metadata.getEventHigh() # highest evt. no in highest run
assert metadata.containsEvent(7, 1, 1)
assert metadata.containsEvent(7, 1, 9)
assert metadata.containsEvent(7, 15, 1)
assert metadata.containsEvent(7, 1, 10) # does not exist, but would be plausible
assert metadata.containsEvent(7, 3, 6) # same
assert not metadata.containsEvent(7, 0, 10) # does not exist, but would be plausible
assert not metadata.containsEvent(7, 16, 6) # same
assert not metadata.containsEvent(0, 15, 6)

assert 0 == metadata.getNParents()

# print (metadata.getDate())
# assert socket.gethostname() == metadata.getSite()
# print (metadata.getUser()) #different env variables, not checked
assert "something important" == metadata.getRandomSeed()

assert get_version() == metadata.getRelease()
assert metadata.getSteering().startswith('#!/usr/bin/env python3')
assert metadata.getSteering().strip().endswith('dummystring')
assert 10 == metadata.getMcEvents()


os.system('touch Belle2FileCatalog.xml')
assert 0 == os.system('b2file-metadata-add --lfn /logical/file/name ' + testFile.name)

assert 0 == os.system('b2file-metadata-show ' + testFile.name)

# Check JSON output (contains steering file, so we cannotuse .out)
metadata_output = subprocess.check_output(['b2file-metadata-show', '--json', testFile.name])
m = json.loads(metadata_output.decode('utf-8'))
assert 7 == m['experimentLow']
assert 1 == m['runLow']
assert 1 == m['eventLow']
assert 7 == m['experimentHigh']
assert 15 == m['runHigh']
assert 1 == m['eventHigh']
assert 'something important' == m['randomSeed']
assert 10 == m['nEvents']
assert isinstance(m['nEvents'], int)
assert 10 == m['nFullEvents']
assert isinstance(m['nFullEvents'], int)
assert '/logical/file/name' == m['LFN']

# steering file is in metadata, so we check for existence of this string:
# dummystring
testFile = 'b2filemetadata.root'

with clean_working_directory():

# Generate a small test file
main = basf2.Path()
main.add_module('EventInfoSetter', evtNumList=[9, 1], runList=[1, 15], expList=[7, 7])
main.add_module('RootOutput', outputFileName=testFile, updateFileCatalog=False)
safe_process(main)

# Check the file meta data (via DataStore)
read = basf2.Path()
read.add_module('RootInput', inputFileName=testFile)
basf2.process(read)

metadata = Belle2.PyStoreObj('FileMetaData', 1)

# print (metadata.getLfn()) #?
assert 10 == metadata.getNEvents()
assert 10 == metadata.getNFullEvents()

assert 7 == metadata.getExperimentLow()
assert 1 == metadata.getRunLow()
assert 1 == metadata.getEventLow()
assert 7 == metadata.getExperimentHigh()
assert 15 == metadata.getRunHigh()
assert 1 == metadata.getEventHigh() # highest evt. no in highest run
assert metadata.containsEvent(7, 1, 1)
assert metadata.containsEvent(7, 1, 9)
assert metadata.containsEvent(7, 15, 1)
assert metadata.containsEvent(7, 1, 10) # does not exist, but would be plausible
assert metadata.containsEvent(7, 3, 6) # same
assert not metadata.containsEvent(7, 0, 10) # does not exist, but would be plausible
assert not metadata.containsEvent(7, 16, 6) # same
assert not metadata.containsEvent(0, 15, 6)

assert 0 == metadata.getNParents()

# print (metadata.getDate())
# assert socket.gethostname() == metadata.getSite()
# print (metadata.getUser()) #different env variables, not checked
assert "something important" == metadata.getRandomSeed()

assert get_version() == metadata.getRelease()
assert metadata.getSteering().startswith('#!/usr/bin/env python3')
assert metadata.getSteering().strip().endswith('dummystring')
assert 10 == metadata.getMcEvents()

os.system('touch Belle2FileCatalog.xml')
assert 0 == os.system('b2file-metadata-add --lfn /logical/file/name ' + testFile)

assert 0 == os.system('b2file-metadata-show ' + testFile)

# Check JSON output (contains steering file, so we cannot use .out)
metadata_output = subprocess.check_output(['b2file-metadata-show', '--json', testFile])
m = json.loads(metadata_output.decode('utf-8'))
assert 7 == m['experimentLow']
assert 1 == m['runLow']
assert 1 == m['eventLow']
assert 7 == m['experimentHigh']
assert 15 == m['runHigh']
assert 1 == m['eventHigh']
assert 'something important' == m['randomSeed']
assert 10 == m['nEvents']
assert isinstance(m['nEvents'], int)
assert 10 == m['nFullEvents']
assert isinstance(m['nFullEvents'], int)
assert '/logical/file/name' == m['LFN']

# steering file is in metadata, so we check for existence of this string:
# dummystring
5 changes: 5 additions & 0 deletions framework/tests/root_output_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ def check_jobfile(jobfile, expected_files):
assert safe_process(path) == 0, "RootOutput failed"
check_jobfile("job-info1.json", {f"test_split.f0000{i}.root" for i in range(2)})

# check metadata
meta = [get_metadata(e) for e in ["test_split.f00000.root", "test_split.f00001.root"]]
assert meta[0]["parents"] == [], "There should be no parents"
assert meta[1]["parents"] == [], "There should be no parents"

# check files and set a well known lfn
for i in range(2):
subprocess.check_call(["b2file-metadata-add", "-l", f"parent{i}", f"test_split.f0000{i}.root"])
Expand Down

0 comments on commit 1fd9fc1

Please sign in to comment.