diff --git a/source/framework/core/inc/TRestCut.h b/source/framework/core/inc/TRestCut.h index 7b7bf743e..f77ef85d9 100644 --- a/source/framework/core/inc/TRestCut.h +++ b/source/framework/core/inc/TRestCut.h @@ -30,8 +30,15 @@ //! A class to help on cuts definitions. To be used with TRestAnalysisTree class TRestCut : public TRestMetadata { private: + /// Vector of TCuts std::vector fCuts; + /// Vector of cut strings e.g. when you use a complex cut + std::vector fCutStrings; + + /// Vector of parameter cuts, first item is parameter and second is the condition + std::vector > fParamCut; + protected: void Initialize() override; void InitFromConfigFile() override; @@ -40,6 +47,12 @@ class TRestCut : public TRestMetadata { void AddCut(TCut cut); TCut GetCut(std::string name); + inline auto GetCutStrings() const { return fCutStrings; } + inline auto GetParamCut() const { return fParamCut; } + inline auto GetCuts() const { return fCuts; } + + TRestCut& operator=(TRestCut& cut); + void PrintMetadata() override; Int_t Write(const char* name, Int_t option, Int_t bufsize) override; @@ -49,7 +62,7 @@ class TRestCut : public TRestMetadata { // Destructor ~TRestCut() {} - ClassDefOverride(TRestCut, 1); // Template for a REST "event process" class inherited from + ClassDefOverride(TRestCut, 2); // Template for a REST "event process" class inherited from // TRestEventProcess }; #endif diff --git a/source/framework/core/inc/TRestDataSet.h b/source/framework/core/inc/TRestDataSet.h index 3704a1089..d78fd72ce 100644 --- a/source/framework/core/inc/TRestDataSet.h +++ b/source/framework/core/inc/TRestDataSet.h @@ -27,6 +27,7 @@ #include +#include "TRestCut.h" #include "TRestMetadata.h" struct RelevantQuantity { @@ -47,10 +48,10 @@ struct RelevantQuantity { class TRestDataSet : public TRestMetadata { private: /// All the selected runs will have a starting date after fStartTime - std::string fStartTime = "2000/01/01"; //< + std::string fFilterStartTime = "2000/01/01"; //< /// All the selected runs will have an ending date before fEndTime - std::string fEndTime = "3000/12/31"; //< + std::string fFilterEndTime = "3000/12/31"; //< /// A glob file pattern that must be satisfied by all files std::string fFilePattern = ""; //< @@ -76,18 +77,27 @@ class TRestDataSet : public TRestMetadata { /// The properties of a relevant quantity that we want to store together with the dataset std::map fQuantity; //< + /// Parameter cuts over the selected dataset + TRestCut* fCut = nullptr; + /// The total integrated run time of selected files Double_t fTotalDuration = 0; //< - /// The resulting RDataFrame object after initialization - ROOT::RDataFrame fDataSet = 0; //! + /// A list populated by the FileSelection method using the conditions of the dataset + std::vector fFileSelection; //< + + /// TimeStamp for the start time of the first file + Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime); + + /// TimeStamp for the end time of the last file + Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime); + + /// The resulting RDF::RNode object after initialization + ROOT::RDF::RNode fDataSet = ROOT::RDataFrame(0); //! /// A pointer to the generated tree TTree* fTree = nullptr; //! - /// A list populated by the FileSelection method using the conditions of the dataset - std::vector fFileSelection; //! - void InitFromConfigFile() override; protected: @@ -95,11 +105,13 @@ class TRestDataSet : public TRestMetadata { public: /// Gives access to the RDataFrame - ROOT::RDataFrame GetDataFrame() const { + ROOT::RDF::RNode GetDataFrame() const { if (fTree == nullptr) RESTWarning << "DataFrame has not been yet initialized" << RESTendl; return fDataSet; } + void SetDataSet(const ROOT::RDF::RNode& dS) { fDataSet = dS; } + /// Gives access to the tree TTree* GetTree() const { if (fTree == nullptr) { @@ -122,14 +134,37 @@ class TRestDataSet : public TRestMetadata { /// It returns the accumulated run time in seconds Double_t GetTotalTimeInSeconds() const { return fTotalDuration; } + inline auto GetFilterStartTime() const { return fFilterStartTime; } + inline auto GetFilterEndTime() const { return fFilterEndTime; } + inline auto GetStartTime() const { return fStartTime; } + inline auto GetEndTime() const { return fEndTime; } + inline auto GetFilePattern() const { return fFilePattern; } + inline auto GetObservablesList() const { return fObservablesList; } + inline auto GetProcessObservablesList() const { return fProcessObservablesList; } + inline auto GetFilterMetadata() const { return fFilterMetadata; } + inline auto GetFilterContains() const { return fFilterContains; } + inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; } + inline auto GetFilterLowerThan() const { return fFilterLowerThan; } + inline auto GetQuantity() const { return fQuantity; } + inline auto GetCut() const { return fCut; } + + inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; } + + TRestDataSet& operator=(TRestDataSet& dS); + void Import(const std::string& fileName); void Export(const std::string& filename); + ROOT::RDF::RNode MakeCut(const TRestCut* cut); + void PrintMetadata() override; void Initialize() override; + + void GenerateDataSet(); + TRestDataSet(); TRestDataSet(const char* cfgFileName, const std::string& name = ""); ~TRestDataSet(); - ClassDefOverride(TRestDataSet, 1); + ClassDefOverride(TRestDataSet, 2); }; #endif diff --git a/source/framework/core/src/TRestCut.cxx b/source/framework/core/src/TRestCut.cxx index 7fc4a8d4a..fde9db0d0 100644 --- a/source/framework/core/src/TRestCut.cxx +++ b/source/framework/core/src/TRestCut.cxx @@ -26,6 +26,7 @@ /// /// /// +/// /// /// /// Note that the notations " AND " and " OR " will be replaced by " && " and " || " @@ -44,6 +45,9 @@ /// 2021-dec: First concept. /// Ni Kaixiang /// +/// 2023-March: Updating metadata structures +/// JuanAn GarcĂ­a +/// /// \class TRestCut /// ///
@@ -68,14 +72,44 @@ void TRestCut::InitFromConfigFile() { auto ele = GetElement("cut"); while (ele != nullptr) { string name = GetParameter("name", ele, ""); + if (name.empty() || name == "Not defined") { + RESTError << "< cut does not contain a name!" << RESTendl; + exit(1); + } + string cutStr = GetParameter("value", ele, ""); - cutStr = Replace(cutStr, " AND ", " && "); - cutStr = Replace(cutStr, " OR ", " || "); - AddCut(TCut(name.c_str(), cutStr.c_str())); + string variable = GetParameter("variable", ele, ""); + string condition = GetParameter("condition", ele, ""); + + if (!cutStr.empty()) { + cutStr = Replace(cutStr, " AND ", " && "); + cutStr = Replace(cutStr, " OR ", " || "); + fCutStrings.push_back(cutStr); + AddCut(TCut(name.c_str(), cutStr.c_str())); + } else if (!variable.empty() && !condition.empty()) { + fParamCut.push_back(std::make_pair(variable, condition)); + string cutVar = variable + condition; + AddCut(TCut(name.c_str(), cutVar.c_str())); + } else { + RESTError << "TRestCut does not contain a valid parameter/condition or cut string!" << RESTendl; + RESTError << "" << RESTendl; + RESTError << " /// +/// // Will apply a cut to the observables +/// +/// +/// +/// /// // Will add all the observables from the process `rawAna` /// /// @@ -119,7 +124,7 @@ /// \code /// restRoot /// [0] TRestDataSet d("dataset"); -/// [1] d.Initialize(); +/// [1] d.GenerateDataSet(); /// [2] d.GetTree()->GetEntries() /// [3] d.GetDataFrame().GetColumnNames() /// \endcode @@ -147,15 +152,23 @@ /// instance. /// /// -/// Example: +/// Example 1 Generate DataSet from config file: /// \code /// restRoot -/// [0] TRestDataSet d("dataset"); -/// [1] d.Initialize(); +/// [0] TRestDataSet d("dataset", "dataSetName"); +/// [1] d.GenerateDataSet(); /// [2] d.Export("mydataset.csv"); /// [3] d.Export("mydataset.root"); /// \endcode /// +/// Example 2 Import existing DataSet: +/// \code +/// restRoot +/// [0] TRestDataSet d(); +/// [1] d.Import("myDataSet.root"); +/// [2] d.GetTree()->GetEntries() +/// \endcode +/// /// ### Relevant quantities /// /// Sometimes we will be willing that our dataset contains few variables @@ -249,12 +262,16 @@ TRestDataSet::TRestDataSet(const char* cfgFileName, const std::string& name) : T TRestDataSet::~TRestDataSet() {} /////////////////////////////////////////////// -/// \brief It will initialize the data frame with the filelist and column names -/// (or observables) that have been defined by the user. +/// \brief This function initialize different parameters +/// from the TRestDataSet /// -void TRestDataSet::Initialize() { - SetSectionName(this->ClassName()); +void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); } +/////////////////////////////////////////////// +/// \brief This function generates the data frame with the filelist and column names +/// (or observables) that have been defined by the user. +/// +void TRestDataSet::GenerateDataSet() { if (fTree != nullptr) { RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::Initialize ... " << RESTendl; return; @@ -263,29 +280,43 @@ void TRestDataSet::Initialize() { if (fFileSelection.empty()) FileSelection(); // We are not ready yet - if (fFileSelection.empty()) return; + if (fFileSelection.empty()) { + RESTError << "File selection is empty " << RESTendl; + return; + } ///// Disentangling process observables --> producing finalList - TRestRun run(fFileSelection[0]); + TRestRun run(fFileSelection.front()); std::vector finalList; finalList.push_back("runOrigin"); finalList.push_back("eventID"); finalList.push_back("timeStamp"); - for (const auto& obs : fObservablesList) finalList.push_back(obs); + auto obsNames = run.GetAnalysisTree()->GetObservableNames(); + for (const auto& obs : fObservablesList) { + if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) { + finalList.push_back(obs); + } else { + RESTWarning << " Observable " << obs << " not found in observable list, skipping..." << RESTendl; + } + } - std::vector obsNames = run.GetAnalysisTree()->GetObservableNames(); for (const auto& name : obsNames) { for (const auto& pcs : fProcessObservablesList) { if (name.find(pcs) == 0) finalList.push_back(name); } } - /////// + + // Remove duplicated observables if any + std::sort(finalList.begin(), finalList.end()); + finalList.erase(std::unique(finalList.begin(), finalList.end()), finalList.end()); ROOT::EnableImplicitMT(); fDataSet = ROOT::RDataFrame("AnalysisTree", fFileSelection); + fDataSet = MakeCut(fCut); + std::string user = getenv("USER"); std::string fOutName = "/tmp/rest_output_" + user + ".root"; fDataSet.Snapshot("AnalysisTree", fOutName, finalList); @@ -295,17 +326,6 @@ void TRestDataSet::Initialize() { TFile* f = TFile::Open(fOutName.c_str()); fTree = (TTree*)f->Get("AnalysisTree"); - int cont = 0; - std::string obsListStr; - for (const auto& l : finalList) { - if (cont > 0) obsListStr += ":"; - obsListStr += l; - cont++; - } - - // We do this so that later we can recover the values using TTree::GetVal - fTree->Draw((TString)obsListStr, "", "goff"); - RESTInfo << " - Dataset initialized!" << RESTendl; } @@ -315,8 +335,8 @@ void TRestDataSet::Initialize() { std::vector TRestDataSet::FileSelection() { fFileSelection.clear(); - std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fStartTime); - std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fEndTime); + std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fFilterStartTime); + std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fFilterEndTime); if (!time_stamp_end || !time_stamp_start) { RESTError << "TRestDataSet::FileSelect. Start or end dates not properly formed. Please, check " @@ -390,6 +410,10 @@ std::vector TRestDataSet::FileSelection() { if (properties.strategy == "last") properties.value = value; } + if (run.GetStartTimestamp() < fStartTime) fStartTime = run.GetStartTimestamp(); + + if (run.GetEndTimestamp() > fEndTime) fEndTime = run.GetEndTimestamp(); + fTotalDuration += run.GetEndTimestamp() - run.GetStartTimestamp(); fFileSelection.push_back(file); } @@ -398,14 +422,58 @@ std::vector TRestDataSet::FileSelection() { return fFileSelection; } +/////////////////////////////////////////////// +/// \brief This function apply a TRestCut to the dataframe +/// and returns a dataframe with the applied cuts. Note that +/// the cuts are not applied directly to the dataframe on +/// TRestDataSet, to do so you should do fDataSet = MakeCut(fCut); +/// +ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) { + auto df = fDataSet; + + if (cut == nullptr) return df; + + auto paramCut = cut->GetParamCut(); + auto obsList = df.GetColumnNames(); + for (const auto& [param, condition] : paramCut) { + if (std::find(obsList.begin(), obsList.end(), param) != obsList.end()) { + std::string pCut = param + condition; + RESTDebug << "Applying cut " << pCut << RESTendl; + df = df.Filter(pCut); + } else { + RESTWarning << " Cut observable " << param << " not found in observable list, skipping..." + << RESTendl; + } + } + + auto cutString = cut->GetCutStrings(); + for (const auto& pCut : cutString) { + bool added = false; + for (const auto& obs : obsList) { + if (pCut.find(obs) != std::string::npos) { + RESTDebug << "Applying cut " << pCut << RESTendl; + df = df.Filter(pCut); + added = true; + break; + } + } + + if (!added) { + RESTWarning << " Cut string " << pCut << " not found in observable list, skipping..." << RESTendl; + } + } + + return df; +} + ///////////////////////////////////////////// -/// \brief Prints on screen the information about the metadata members of TRestAxionSolarFlux +/// \brief Prints on screen the information about the metadata members of TRestDataSet /// void TRestDataSet::PrintMetadata() { TRestMetadata::PrintMetadata(); - RESTMetadata << " - StartTime : " << fStartTime << RESTendl; - RESTMetadata << " - EndTime : " << fEndTime << RESTendl; + RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl; + RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl; RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl; RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl; RESTMetadata << " " << RESTendl; @@ -434,7 +502,8 @@ void TRestDataSet::PrintMetadata() { if (!fFilterMetadata.empty()) { RESTMetadata << " Metadata filters: " << RESTendl; RESTMetadata << " ----------------- " << RESTendl; - + RESTMetadata << " - StartTime : " << fFilterStartTime << RESTendl; + RESTMetadata << " - EndTime : " << fFilterEndTime << RESTendl; int n = 0; for (const auto& mdFilter : fFilterMetadata) { RESTMetadata << " - " << mdFilter << "."; @@ -509,7 +578,7 @@ void TRestDataSet::InitFromConfigFile() { std::vector obsList = REST_StringHelper::Split(observables, ","); - for (const auto& l : obsList) fObservablesList.push_back(l); + fObservablesList.insert(fObservablesList.end(), obsList.begin(), obsList.end()); observablesDefinition = GetNextElement(observablesDefinition); } @@ -562,6 +631,8 @@ void TRestDataSet::InitFromConfigFile() { quantityDefinition = GetNextElement(quantityDefinition); } + + fCut = (TRestCut*)InstantiateChildMetadata("TRestCut"); } /////////////////////////////////////////////// @@ -599,8 +670,8 @@ void TRestDataSet::Export(const std::string& filename) { ///// Writing header fprintf(f, "### TRestDataSet generated file\n"); fprintf(f, "### \n"); - fprintf(f, "### StartTime : %s\n", fStartTime.c_str()); - fprintf(f, "### EndTime : %s\n", fEndTime.c_str()); + fprintf(f, "### StartTime : %s\n", fFilterStartTime.c_str()); + fprintf(f, "### EndTime : %s\n", fFilterEndTime.c_str()); fprintf(f, "###\n"); fprintf(f, "### Accumulated run time (seconds) : %lf\n", fTotalDuration); fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.); @@ -637,6 +708,16 @@ void TRestDataSet::Export(const std::string& filename) { fprintf(f, "###\n"); fprintf(f, "### Data starts here\n"); + auto obsNames = fDataSet.GetColumnNames(); + std::string obsListStr = ""; + for (const auto& l : obsNames) { + if (!obsListStr.empty()) obsListStr += ":"; + obsListStr += l; + } + + // We do this so that later we can recover the values using TTree::GetVal + fTree->Draw((TString)obsListStr, "", "goff"); + for (unsigned int n = 0; n < fTree->GetEntries(); n++) { for (unsigned int m = 0; m < GetNumberOfBranches(); m++) { std::string bName = fTree->GetListOfBranches()->At(m)->GetName(); @@ -665,3 +746,62 @@ void TRestDataSet::Export(const std::string& filename) { << " not recognized" << RESTendl; } } + +/////////////////////////////////////////////// +/// \brief Operator to copy TRestDataSet metadata +/// +TRestDataSet& TRestDataSet::operator=(TRestDataSet& dS) { + SetName(dS.GetName()); + fFilterStartTime = dS.GetFilterStartTime(); + fFilterEndTime = dS.GetFilterEndTime(); + fStartTime = dS.GetStartTime(); + fEndTime = dS.GetEndTime(); + fFilePattern = dS.GetFilePattern(); + fObservablesList = dS.GetObservablesList(); + fProcessObservablesList = dS.GetProcessObservablesList(); + fFilterMetadata = dS.GetFilterMetadata(); + fFilterContains = dS.GetFilterContains(); + fFilterGreaterThan = dS.GetFilterGreaterThan(); + fFilterLowerThan = dS.GetFilterLowerThan(); + fQuantity = dS.GetQuantity(); + fTotalDuration = dS.GetTotalTimeInSeconds(); + fCut = dS.GetCut(); + + return *this; +} + +/////////////////////////////////////////////// +/// \brief This function imports metadata from a root file +/// it import metadata info from the previous dataSet +/// while it opens the analysis tree +/// +void TRestDataSet::Import(const std::string& fileName) { + if (TRestTools::GetFileNameExtension(fileName) != "root") { + RESTError << "Datasets can only be imported from root files" << RESTendl; + return; + } + + TFile* file = TFile::Open(fileName.c_str(), "READ"); + if (file != nullptr) { + TIter nextkey(file->GetListOfKeys()); + TKey* key; + while ((key = (TKey*)nextkey())) { + std::string kName = key->GetClassName(); + if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr && + REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) { + TRestDataSet* dS = file->Get(key->GetName()); + if (GetVerboseLevel() >= TRestStringOutput::REST_Verbose_Level::REST_Info) + dS->PrintMetadata(); + *this = *dS; + } + } + } else { + RESTError << "Cannot open " << fileName << RESTendl; + exit(1); + } + + RESTInfo << "Opening " << fileName << RESTendl; + fDataSet = ROOT::RDataFrame("AnalysisTree", fileName); + + fTree = (TTree*)file->Get("AnalysisTree"); +}