diff --git a/macros/REST_GenerateDataSets.C b/macros/REST_GenerateDataSets.C new file mode 100644 index 000000000..d6c3f02bc --- /dev/null +++ b/macros/REST_GenerateDataSets.C @@ -0,0 +1,29 @@ +#include "TRestDataSet.h" +#include "TRestTask.h" + +#ifndef RestTask_GenerateDataSets +#define RestTask_GenerateDataSets + +//******************************************************************************************************* +//*** Description: This macro will launch the generation of datasets defined +//*** inside a particular RML file `datasets.rml` that contains the dataset +//*** definitions. The second argument will allow to specify the datasets +//*** to be generated from the existing ones inside `dataset.rml`. +//*** +//*** -------------- +//*** Usage: restManager GenerateDataSets datasets.rml set1,set2,set3 +//*** +//******************************************************************************************************* + +Int_t REST_GenerateDataSets(const std::string& inputRML, const std::string& datasets) { + std::vector sets = REST_StringHelper::Split(datasets, ","); + + for (const auto& set : sets) { + std::cout << "Set : " << set << std::endl; + TRestDataSet d(inputRML.c_str(), set.c_str()); + d.GenerateDataSet(); + d.Export("Dataset_" + set + ".root"); + } + return 0; +} +#endif diff --git a/source/framework/core/inc/TRestDataSet.h b/source/framework/core/inc/TRestDataSet.h index d78fd72ce..ac9570ce9 100644 --- a/source/framework/core/inc/TRestDataSet.h +++ b/source/framework/core/inc/TRestDataSet.h @@ -65,7 +65,7 @@ class TRestDataSet : public TRestMetadata { /// A list of metadata members where filters will be applied std::vector fFilterMetadata; //< - /// If not empty it will check if the metadata member contains the value + /// If not empty it will check if the metadata member contains the string std::vector fFilterContains; //< /// If the corresponding element is not empty it will check if the metadata member is greater @@ -74,6 +74,9 @@ class TRestDataSet : public TRestMetadata { /// If the corresponding element is not empty it will check if the metadata member is lower std::vector fFilterLowerThan; //< + /// If the corresponding element is not empty it will check if the metadata member is equal + std::vector fFilterEqualsTo; //< + /// The properties of a relevant quantity that we want to store together with the dataset std::map fQuantity; //< @@ -87,16 +90,22 @@ class TRestDataSet : public TRestMetadata { std::vector fFileSelection; //< /// TimeStamp for the start time of the first file - Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime); + Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime); //< /// TimeStamp for the end time of the last file - Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime); + Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime); //< + + /// It keeps track if the generated dataset is a pure dataset or a merged one + Bool_t fMergedDataset = false; //< + + /// The list of dataset files imported + std::vector fImportedFiles; //< /// The resulting RDF::RNode object after initialization ROOT::RDF::RNode fDataSet = ROOT::RDataFrame(0); //! /// A pointer to the generated tree - TTree* fTree = nullptr; //! + TChain* fTree = nullptr; //! void InitFromConfigFile() override; @@ -116,7 +125,7 @@ class TRestDataSet : public TRestMetadata { TTree* GetTree() const { if (fTree == nullptr) { RESTError << "Tree has not been yet initialized" << RESTendl; - RESTError << "You should invoke TRestDataSet::Initialize() before trying to access the tree" + RESTError << "You should invoke TRestDataSet::GenerateDataSet() before trying to access the tree" << RESTendl; } return fTree; @@ -145,17 +154,22 @@ class TRestDataSet : public TRestMetadata { inline auto GetFilterContains() const { return fFilterContains; } inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; } inline auto GetFilterLowerThan() const { return fFilterLowerThan; } + inline auto GetFilterEqualsTo() const { return fFilterEqualsTo; } inline auto GetQuantity() const { return fQuantity; } inline auto GetCut() const { return fCut; } + inline auto IsMergedDataSet() const { return fMergedDataset; } inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; } TRestDataSet& operator=(TRestDataSet& dS); void Import(const std::string& fileName); + void Import(std::vector fileNames); void Export(const std::string& filename); ROOT::RDF::RNode MakeCut(const TRestCut* cut); + ROOT::RDF::RNode Define(const std::string& columnName, const std::string& formula); + void PrintMetadata() override; void Initialize() override; @@ -165,6 +179,6 @@ class TRestDataSet : public TRestMetadata { TRestDataSet(const char* cfgFileName, const std::string& name = ""); ~TRestDataSet(); - ClassDefOverride(TRestDataSet, 2); + ClassDefOverride(TRestDataSet, 3); }; #endif diff --git a/source/framework/core/src/TRestDataSet.cxx b/source/framework/core/src/TRestDataSet.cxx index 9acd42e69..babe06b69 100644 --- a/source/framework/core/src/TRestDataSet.cxx +++ b/source/framework/core/src/TRestDataSet.cxx @@ -209,6 +209,18 @@ /// - **last**: It will simply register the value of the metadata member /// from the last file in the list of selected files. /// +/// ### Adding a new column based on relevant quantities +/// +/// Using the method TRestDataSet::Define method we can implement a +/// formula based on column names and relevant quantities. Then, the +/// relevant quantities will be sustituted by their dataset value. +/// +/// \code +/// dataset.GetColumnNames() +/// dataset.Define("newColumnName", "QuantityName * column1" ) +/// dataset.GetColumnNames() +/// dataset.GetDataFrame().Display({"column1", "newColumnName"})->Print(); +/// \endcode /// ///---------------------------------------------------------------------- /// @@ -273,7 +285,8 @@ void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); } /// void TRestDataSet::GenerateDataSet() { if (fTree != nullptr) { - RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::Initialize ... " << RESTendl; + RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... " + << RESTendl; return; } @@ -324,7 +337,7 @@ void TRestDataSet::GenerateDataSet() { fDataSet = ROOT::RDataFrame("AnalysisTree", fOutName); TFile* f = TFile::Open(fOutName.c_str()); - fTree = (TTree*)f->Get("AnalysisTree"); + fTree = (TChain*)f->Get("AnalysisTree"); RESTInfo << " - Dataset initialized!" << RESTendl; } @@ -378,6 +391,9 @@ std::vector TRestDataSet::FileSelection() { if (fFilterLowerThan[n] != -1) if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false; + if (fFilterEqualsTo[n] != -1) + if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false; + n++; } @@ -466,6 +482,29 @@ ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) { return df; } +/////////////////////////////////////////////// +/// \brief This function will add a new column to the RDataFrame using +/// the same scheme as the usual RDF::Define method, but it will on top of +/// that evaluate the values of any relevant quantities used. +/// +/// For example, the following code line would create a new column named +/// `test` replacing the relevant quantity `Nsim` and the previously +/// existing column `probability`. +/// \code +/// d.Define("test", "Nsim * probability"); +/// \endcode +/// +ROOT::RDF::RNode TRestDataSet::Define(const std::string& columnName, const std::string& formula) { + std::string evalFormula = formula; + for (auto const& [name, properties] : fQuantity) + evalFormula = + REST_StringHelper::Replace(evalFormula, name, DoubleToString(properties.value, "%12.10e")); + + fDataSet = fDataSet.Define(columnName, evalFormula); + + return fDataSet; +} + ///////////////////////////////////////////// /// \brief Prints on screen the information about the metadata members of TRestDataSet /// @@ -511,6 +550,7 @@ void TRestDataSet::PrintMetadata() { if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n]; if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n]; if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n]; + if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n]; RESTMetadata << RESTendl; n++; @@ -535,6 +575,19 @@ void TRestDataSet::PrintMetadata() { } } + if (fMergedDataset) { + RESTMetadata << " " << RESTendl; + RESTMetadata << "This is a combined dataset." << RESTendl; + RESTMetadata << " -------------------- " << RESTendl; + RESTMetadata << " - Relevant quantities have been removed!" << RESTendl; + RESTMetadata << " - Dataset metadata properties correspond to the first file in the list." + << RESTendl; + RESTMetadata << " " << RESTendl; + RESTMetadata << "List of imported files: " << RESTendl; + RESTMetadata << " -------------------- " << RESTendl; + for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl; + } + RESTMetadata << "----" << RESTendl; } @@ -559,10 +612,12 @@ void TRestDataSet::InitFromConfigFile() { if (contains == "Not defined") contains = ""; Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition)); Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition)); + Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition)); fFilterContains.push_back(contains); fFilterGreaterThan.push_back(greaterThan); fFilterLowerThan.push_back(lowerThan); + fFilterEqualsTo.push_back(equalsTo); filterDefinition = GetNextElement(filterDefinition); } @@ -688,6 +743,7 @@ void TRestDataSet::Export(const std::string& filename) { if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str()); if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]); if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]); + if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]); fprintf(f, "\n"); n++; } @@ -763,6 +819,7 @@ TRestDataSet& TRestDataSet::operator=(TRestDataSet& dS) { fFilterContains = dS.GetFilterContains(); fFilterGreaterThan = dS.GetFilterGreaterThan(); fFilterLowerThan = dS.GetFilterLowerThan(); + fFilterEqualsTo = dS.GetFilterEqualsTo(); fQuantity = dS.GetQuantity(); fTotalDuration = dS.GetTotalTimeInSeconds(); fCut = dS.GetCut(); @@ -803,5 +860,59 @@ void TRestDataSet::Import(const std::string& fileName) { RESTInfo << "Opening " << fileName << RESTendl; fDataSet = ROOT::RDataFrame("AnalysisTree", fileName); - fTree = (TTree*)file->Get("AnalysisTree"); + fTree = (TChain*)file->Get("AnalysisTree"); +} + +/////////////////////////////////////////////// +/// \brief This function initializes the chained tree and the RDataFrame using +/// as input several root files that should contain TRestDataSet metadata +/// information. The values of the first dataset will be considered to be stored +/// in this new instance. +/// +/// The metadata member `fMergedDataset` will be set to true to understand this +/// dataset is the combination of several datasets, and not a pure original one. +/// +void TRestDataSet::Import(std::vector fileNames) { + for (const auto& fN : fileNames) + if (TRestTools::GetFileNameExtension(fN) != "root") { + RESTError << "Datasets can only be imported from root files" << RESTendl; + return; + } + + if (fileNames.size() == 0) return; + + TFile* file = TFile::Open(fileNames[0].c_str(), "READ"); + if (file != nullptr) { + TIter nextkey(file->GetListOfKeys()); + TKey* key; + while ((key = (TKey*)nextkey())) { + std::string kName = key->GetClassName(); + if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr && + REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) { + TRestDataSet* dS = file->Get(key->GetName()); + if (GetVerboseLevel() >= TRestStringOutput::REST_Verbose_Level::REST_Info) + dS->PrintMetadata(); + *this = *dS; + } + } + } else { + RESTError << "Cannot open " << fileNames[0] << RESTendl; + exit(1); + } + + RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl; + fDataSet = ROOT::RDataFrame("AnalysisTree", fileNames); + + if (fTree != nullptr) { + delete fTree; + fTree = nullptr; + } + fTree = new TChain("AnalysisTree"); + + for (const auto& fN : fileNames) fTree->Add((TString)fN); + + fMergedDataset = true; + fImportedFiles = fileNames; + + fQuantity.clear(); } diff --git a/source/framework/core/src/TRestMetadata.cxx b/source/framework/core/src/TRestMetadata.cxx index 61a91ae7b..bd8eb1cc2 100644 --- a/source/framework/core/src/TRestMetadata.cxx +++ b/source/framework/core/src/TRestMetadata.cxx @@ -2327,6 +2327,8 @@ std::vector TRestMetadata::GetDataMemberValues(string memberName, Int_t result = Replace(result, "{", ""); result = Replace(result, "}", ""); + result = Replace(result, "(", ""); + result = Replace(result, ")", ""); std::vector results = REST_StringHelper::Split(result, ",");