Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix on TRestMetadata and TRestDataSet add-ons #427

Merged
merged 15 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions macros/REST_GenerateDataSets.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "TRestDataSet.h"
#include "TRestTask.h"

#ifndef RestTask_GenerateDataSets
#define RestTask_GenerateDataSets

//*******************************************************************************************************
//*** Description: This macro will launch the generation of datasets defined
//*** inside a particular RML file `datasets.rml` that contains the dataset
//*** definitions. The second argument will allow to specify the datasets
//*** to be generated from the existing ones inside `dataset.rml`.
//***
//*** --------------
//*** Usage: restManager GenerateDataSets datasets.rml set1,set2,set3
//***
//*******************************************************************************************************

Int_t REST_GenerateDataSets(const std::string& inputRML, const std::string& datasets) {
std::vector<std::string> sets = REST_StringHelper::Split(datasets, ",");

for (const auto& set : sets) {
std::cout << "Set : " << set << std::endl;
TRestDataSet d(inputRML.c_str(), set.c_str());
d.GenerateDataSet();
d.Export("Dataset_" + set + ".root");
}
return 0;
}
#endif
26 changes: 20 additions & 6 deletions source/framework/core/inc/TRestDataSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class TRestDataSet : public TRestMetadata {
/// A list of metadata members where filters will be applied
std::vector<std::string> fFilterMetadata; //<

/// If not empty it will check if the metadata member contains the value
/// If not empty it will check if the metadata member contains the string
std::vector<std::string> fFilterContains; //<

/// If the corresponding element is not empty it will check if the metadata member is greater
Expand All @@ -74,6 +74,9 @@ class TRestDataSet : public TRestMetadata {
/// If the corresponding element is not empty it will check if the metadata member is lower
std::vector<Double_t> fFilterLowerThan; //<

/// If the corresponding element is not empty it will check if the metadata member is equal
std::vector<Double_t> fFilterEqualsTo; //<

/// The properties of a relevant quantity that we want to store together with the dataset
std::map<std::string, RelevantQuantity> fQuantity; //<

Expand All @@ -87,16 +90,22 @@ class TRestDataSet : public TRestMetadata {
std::vector<std::string> fFileSelection; //<

/// TimeStamp for the start time of the first file
Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime);
Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime); //<

/// TimeStamp for the end time of the last file
Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime);
Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime); //<

/// It keeps track if the generated dataset is a pure dataset or a merged one
Bool_t fMergedDataset = false; //<

/// The list of dataset files imported
std::vector<std::string> fImportedFiles; //<

/// The resulting RDF::RNode object after initialization
ROOT::RDF::RNode fDataSet = ROOT::RDataFrame(0); //!

/// A pointer to the generated tree
TTree* fTree = nullptr; //!
TChain* fTree = nullptr; //!

void InitFromConfigFile() override;

Expand All @@ -116,7 +125,7 @@ class TRestDataSet : public TRestMetadata {
TTree* GetTree() const {
if (fTree == nullptr) {
RESTError << "Tree has not been yet initialized" << RESTendl;
RESTError << "You should invoke TRestDataSet::Initialize() before trying to access the tree"
RESTError << "You should invoke TRestDataSet::GenerateDataSet() before trying to access the tree"
<< RESTendl;
}
return fTree;
Expand Down Expand Up @@ -145,17 +154,22 @@ class TRestDataSet : public TRestMetadata {
inline auto GetFilterContains() const { return fFilterContains; }
inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; }
inline auto GetFilterLowerThan() const { return fFilterLowerThan; }
inline auto GetFilterEqualsTo() const { return fFilterEqualsTo; }
inline auto GetQuantity() const { return fQuantity; }
inline auto GetCut() const { return fCut; }
inline auto IsMergedDataSet() const { return fMergedDataset; }

inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; }

TRestDataSet& operator=(TRestDataSet& dS);
void Import(const std::string& fileName);
void Import(std::vector<std::string> fileNames);
void Export(const std::string& filename);

ROOT::RDF::RNode MakeCut(const TRestCut* cut);

ROOT::RDF::RNode Define(const std::string& columnName, const std::string& formula);

void PrintMetadata() override;
void Initialize() override;

Expand All @@ -165,6 +179,6 @@ class TRestDataSet : public TRestMetadata {
TRestDataSet(const char* cfgFileName, const std::string& name = "");
~TRestDataSet();

ClassDefOverride(TRestDataSet, 2);
ClassDefOverride(TRestDataSet, 3);
};
#endif
117 changes: 114 additions & 3 deletions source/framework/core/src/TRestDataSet.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,18 @@
/// - **last**: It will simply register the value of the metadata member
/// from the last file in the list of selected files.
///
/// ### Adding a new column based on relevant quantities
///
/// Using the method TRestDataSet::Define method we can implement a
/// formula based on column names and relevant quantities. Then, the
/// relevant quantities will be sustituted by their dataset value.
///
/// \code
/// dataset.GetColumnNames()
/// dataset.Define("newColumnName", "QuantityName * column1" )
/// dataset.GetColumnNames()
/// dataset.GetDataFrame().Display({"column1", "newColumnName"})->Print();
/// \endcode
///
///----------------------------------------------------------------------
///
Expand Down Expand Up @@ -273,7 +285,8 @@ void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); }
///
void TRestDataSet::GenerateDataSet() {
if (fTree != nullptr) {
RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::Initialize ... " << RESTendl;
RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... "
<< RESTendl;
return;
}

Expand Down Expand Up @@ -324,7 +337,7 @@ void TRestDataSet::GenerateDataSet() {
fDataSet = ROOT::RDataFrame("AnalysisTree", fOutName);

TFile* f = TFile::Open(fOutName.c_str());
fTree = (TTree*)f->Get("AnalysisTree");
fTree = (TChain*)f->Get("AnalysisTree");

RESTInfo << " - Dataset initialized!" << RESTendl;
}
Expand Down Expand Up @@ -378,6 +391,9 @@ std::vector<std::string> TRestDataSet::FileSelection() {
if (fFilterLowerThan[n] != -1)
if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false;

if (fFilterEqualsTo[n] != -1)
if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false;

n++;
}

Expand Down Expand Up @@ -466,6 +482,29 @@ ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
return df;
}

///////////////////////////////////////////////
/// \brief This function will add a new column to the RDataFrame using
/// the same scheme as the usual RDF::Define method, but it will on top of
/// that evaluate the values of any relevant quantities used.
///
/// For example, the following code line would create a new column named
/// `test` replacing the relevant quantity `Nsim` and the previously
/// existing column `probability`.
/// \code
/// d.Define("test", "Nsim * probability");
/// \endcode
///
ROOT::RDF::RNode TRestDataSet::Define(const std::string& columnName, const std::string& formula) {
std::string evalFormula = formula;
for (auto const& [name, properties] : fQuantity)
evalFormula =
REST_StringHelper::Replace(evalFormula, name, DoubleToString(properties.value, "%12.10e"));

fDataSet = fDataSet.Define(columnName, evalFormula);

return fDataSet;
}

/////////////////////////////////////////////
/// \brief Prints on screen the information about the metadata members of TRestDataSet
///
Expand Down Expand Up @@ -511,6 +550,7 @@ void TRestDataSet::PrintMetadata() {
if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n];
if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n];
if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n];
if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n];

RESTMetadata << RESTendl;
n++;
Expand All @@ -535,6 +575,19 @@ void TRestDataSet::PrintMetadata() {
}
}

if (fMergedDataset) {
RESTMetadata << " " << RESTendl;
RESTMetadata << "This is a combined dataset." << RESTendl;
RESTMetadata << " -------------------- " << RESTendl;
RESTMetadata << " - Relevant quantities have been removed!" << RESTendl;
RESTMetadata << " - Dataset metadata properties correspond to the first file in the list."
<< RESTendl;
RESTMetadata << " " << RESTendl;
RESTMetadata << "List of imported files: " << RESTendl;
RESTMetadata << " -------------------- " << RESTendl;
for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl;
}

RESTMetadata << "----" << RESTendl;
}

Expand All @@ -559,10 +612,12 @@ void TRestDataSet::InitFromConfigFile() {
if (contains == "Not defined") contains = "";
Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition));
Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition));
Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition));

fFilterContains.push_back(contains);
fFilterGreaterThan.push_back(greaterThan);
fFilterLowerThan.push_back(lowerThan);
fFilterEqualsTo.push_back(equalsTo);

filterDefinition = GetNextElement(filterDefinition);
}
Expand Down Expand Up @@ -688,6 +743,7 @@ void TRestDataSet::Export(const std::string& filename) {
if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str());
if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]);
if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]);
if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]);
fprintf(f, "\n");
n++;
}
Expand Down Expand Up @@ -763,6 +819,7 @@ TRestDataSet& TRestDataSet::operator=(TRestDataSet& dS) {
fFilterContains = dS.GetFilterContains();
fFilterGreaterThan = dS.GetFilterGreaterThan();
fFilterLowerThan = dS.GetFilterLowerThan();
fFilterEqualsTo = dS.GetFilterEqualsTo();
fQuantity = dS.GetQuantity();
fTotalDuration = dS.GetTotalTimeInSeconds();
fCut = dS.GetCut();
Expand Down Expand Up @@ -803,5 +860,59 @@ void TRestDataSet::Import(const std::string& fileName) {
RESTInfo << "Opening " << fileName << RESTendl;
fDataSet = ROOT::RDataFrame("AnalysisTree", fileName);

fTree = (TTree*)file->Get("AnalysisTree");
fTree = (TChain*)file->Get("AnalysisTree");
}

///////////////////////////////////////////////
/// \brief This function initializes the chained tree and the RDataFrame using
/// as input several root files that should contain TRestDataSet metadata
/// information. The values of the first dataset will be considered to be stored
/// in this new instance.
///
/// The metadata member `fMergedDataset` will be set to true to understand this
/// dataset is the combination of several datasets, and not a pure original one.
///
void TRestDataSet::Import(std::vector<std::string> fileNames) {
for (const auto& fN : fileNames)
if (TRestTools::GetFileNameExtension(fN) != "root") {
RESTError << "Datasets can only be imported from root files" << RESTendl;
return;
}

if (fileNames.size() == 0) return;

TFile* file = TFile::Open(fileNames[0].c_str(), "READ");
if (file != nullptr) {
TIter nextkey(file->GetListOfKeys());
TKey* key;
while ((key = (TKey*)nextkey())) {
std::string kName = key->GetClassName();
if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
TRestDataSet* dS = file->Get<TRestDataSet>(key->GetName());
if (GetVerboseLevel() >= TRestStringOutput::REST_Verbose_Level::REST_Info)
dS->PrintMetadata();
*this = *dS;
}
}
} else {
RESTError << "Cannot open " << fileNames[0] << RESTendl;
exit(1);
}

RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl;
fDataSet = ROOT::RDataFrame("AnalysisTree", fileNames);

if (fTree != nullptr) {
delete fTree;
fTree = nullptr;
}
fTree = new TChain("AnalysisTree");

for (const auto& fN : fileNames) fTree->Add((TString)fN);

fMergedDataset = true;
fImportedFiles = fileNames;

fQuantity.clear();
}
2 changes: 2 additions & 0 deletions source/framework/core/src/TRestMetadata.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2327,6 +2327,8 @@ std::vector<string> TRestMetadata::GetDataMemberValues(string memberName, Int_t

result = Replace(result, "{", "");
result = Replace(result, "}", "");
result = Replace(result, "(", "");
result = Replace(result, ")", "");

std::vector<std::string> results = REST_StringHelper::Split(result, ",");

Expand Down