From 34e81e6fd2c3bebf2893ea696a7f2b6d01cdfb43 Mon Sep 17 00:00:00 2001 From: Javier Galan Date: Wed, 3 May 2023 09:47:32 +0200 Subject: [PATCH] Revert "Merge pull request #386 from rest-for-physics/datasetCut" This reverts commit b830b9ac723a8786f03c674f1693d393ee90cc1f, reversing changes made to a6e0b61348a8956005b27e9a9963cf0ebdd4de91. --- source/framework/core/inc/TRestCut.h | 15 +- source/framework/core/inc/TRestDataSet.h | 53 +----- source/framework/core/src/TRestCut.cxx | 46 +---- source/framework/core/src/TRestDataSet.cxx | 206 ++++----------------- 4 files changed, 48 insertions(+), 272 deletions(-) diff --git a/source/framework/core/inc/TRestCut.h b/source/framework/core/inc/TRestCut.h index f77ef85d9..7b7bf743e 100644 --- a/source/framework/core/inc/TRestCut.h +++ b/source/framework/core/inc/TRestCut.h @@ -30,15 +30,8 @@ //! A class to help on cuts definitions. To be used with TRestAnalysisTree class TRestCut : public TRestMetadata { private: - /// Vector of TCuts std::vector fCuts; - /// Vector of cut strings e.g. when you use a complex cut - std::vector fCutStrings; - - /// Vector of parameter cuts, first item is parameter and second is the condition - std::vector > fParamCut; - protected: void Initialize() override; void InitFromConfigFile() override; @@ -47,12 +40,6 @@ class TRestCut : public TRestMetadata { void AddCut(TCut cut); TCut GetCut(std::string name); - inline auto GetCutStrings() const { return fCutStrings; } - inline auto GetParamCut() const { return fParamCut; } - inline auto GetCuts() const { return fCuts; } - - TRestCut& operator=(TRestCut& cut); - void PrintMetadata() override; Int_t Write(const char* name, Int_t option, Int_t bufsize) override; @@ -62,7 +49,7 @@ class TRestCut : public TRestMetadata { // Destructor ~TRestCut() {} - ClassDefOverride(TRestCut, 2); // Template for a REST "event process" class inherited from + ClassDefOverride(TRestCut, 1); // Template for a REST "event process" class inherited from // TRestEventProcess }; #endif diff --git a/source/framework/core/inc/TRestDataSet.h b/source/framework/core/inc/TRestDataSet.h index d78fd72ce..3704a1089 100644 --- a/source/framework/core/inc/TRestDataSet.h +++ b/source/framework/core/inc/TRestDataSet.h @@ -27,7 +27,6 @@ #include -#include "TRestCut.h" #include "TRestMetadata.h" struct RelevantQuantity { @@ -48,10 +47,10 @@ struct RelevantQuantity { class TRestDataSet : public TRestMetadata { private: /// All the selected runs will have a starting date after fStartTime - std::string fFilterStartTime = "2000/01/01"; //< + std::string fStartTime = "2000/01/01"; //< /// All the selected runs will have an ending date before fEndTime - std::string fFilterEndTime = "3000/12/31"; //< + std::string fEndTime = "3000/12/31"; //< /// A glob file pattern that must be satisfied by all files std::string fFilePattern = ""; //< @@ -77,27 +76,18 @@ class TRestDataSet : public TRestMetadata { /// The properties of a relevant quantity that we want to store together with the dataset std::map fQuantity; //< - /// Parameter cuts over the selected dataset - TRestCut* fCut = nullptr; - /// The total integrated run time of selected files Double_t fTotalDuration = 0; //< - /// A list populated by the FileSelection method using the conditions of the dataset - std::vector fFileSelection; //< - - /// TimeStamp for the start time of the first file - Double_t fStartTime = REST_StringHelper::StringToTimeStamp(fFilterEndTime); - - /// TimeStamp for the end time of the last file - Double_t fEndTime = REST_StringHelper::StringToTimeStamp(fFilterStartTime); - - /// The resulting RDF::RNode object after initialization - ROOT::RDF::RNode fDataSet = ROOT::RDataFrame(0); //! + /// The resulting RDataFrame object after initialization + ROOT::RDataFrame fDataSet = 0; //! /// A pointer to the generated tree TTree* fTree = nullptr; //! + /// A list populated by the FileSelection method using the conditions of the dataset + std::vector fFileSelection; //! + void InitFromConfigFile() override; protected: @@ -105,13 +95,11 @@ class TRestDataSet : public TRestMetadata { public: /// Gives access to the RDataFrame - ROOT::RDF::RNode GetDataFrame() const { + ROOT::RDataFrame GetDataFrame() const { if (fTree == nullptr) RESTWarning << "DataFrame has not been yet initialized" << RESTendl; return fDataSet; } - void SetDataSet(const ROOT::RDF::RNode& dS) { fDataSet = dS; } - /// Gives access to the tree TTree* GetTree() const { if (fTree == nullptr) { @@ -134,37 +122,14 @@ class TRestDataSet : public TRestMetadata { /// It returns the accumulated run time in seconds Double_t GetTotalTimeInSeconds() const { return fTotalDuration; } - inline auto GetFilterStartTime() const { return fFilterStartTime; } - inline auto GetFilterEndTime() const { return fFilterEndTime; } - inline auto GetStartTime() const { return fStartTime; } - inline auto GetEndTime() const { return fEndTime; } - inline auto GetFilePattern() const { return fFilePattern; } - inline auto GetObservablesList() const { return fObservablesList; } - inline auto GetProcessObservablesList() const { return fProcessObservablesList; } - inline auto GetFilterMetadata() const { return fFilterMetadata; } - inline auto GetFilterContains() const { return fFilterContains; } - inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; } - inline auto GetFilterLowerThan() const { return fFilterLowerThan; } - inline auto GetQuantity() const { return fQuantity; } - inline auto GetCut() const { return fCut; } - - inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; } - - TRestDataSet& operator=(TRestDataSet& dS); - void Import(const std::string& fileName); void Export(const std::string& filename); - ROOT::RDF::RNode MakeCut(const TRestCut* cut); - void PrintMetadata() override; void Initialize() override; - - void GenerateDataSet(); - TRestDataSet(); TRestDataSet(const char* cfgFileName, const std::string& name = ""); ~TRestDataSet(); - ClassDefOverride(TRestDataSet, 2); + ClassDefOverride(TRestDataSet, 1); }; #endif diff --git a/source/framework/core/src/TRestCut.cxx b/source/framework/core/src/TRestCut.cxx index fde9db0d0..7fc4a8d4a 100644 --- a/source/framework/core/src/TRestCut.cxx +++ b/source/framework/core/src/TRestCut.cxx @@ -26,7 +26,6 @@ /// /// /// -/// /// /// /// Note that the notations " AND " and " OR " will be replaced by " && " and " || " @@ -45,9 +44,6 @@ /// 2021-dec: First concept. /// Ni Kaixiang /// -/// 2023-March: Updating metadata structures -/// JuanAn GarcĂ­a -/// /// \class TRestCut /// ///
@@ -72,44 +68,14 @@ void TRestCut::InitFromConfigFile() { auto ele = GetElement("cut"); while (ele != nullptr) { string name = GetParameter("name", ele, ""); - if (name.empty() || name == "Not defined") { - RESTError << "< cut does not contain a name!" << RESTendl; - exit(1); - } - string cutStr = GetParameter("value", ele, ""); - string variable = GetParameter("variable", ele, ""); - string condition = GetParameter("condition", ele, ""); - - if (!cutStr.empty()) { - cutStr = Replace(cutStr, " AND ", " && "); - cutStr = Replace(cutStr, " OR ", " || "); - fCutStrings.push_back(cutStr); - AddCut(TCut(name.c_str(), cutStr.c_str())); - } else if (!variable.empty() && !condition.empty()) { - fParamCut.push_back(std::make_pair(variable, condition)); - string cutVar = variable + condition; - AddCut(TCut(name.c_str(), cutVar.c_str())); - } else { - RESTError << "TRestCut does not contain a valid parameter/condition or cut string!" << RESTendl; - RESTError << "" << RESTendl; - RESTError << " /// -/// // Will apply a cut to the observables -/// -/// -/// -/// /// // Will add all the observables from the process `rawAna` /// /// @@ -124,7 +119,7 @@ /// \code /// restRoot /// [0] TRestDataSet d("dataset"); -/// [1] d.GenerateDataSet(); +/// [1] d.Initialize(); /// [2] d.GetTree()->GetEntries() /// [3] d.GetDataFrame().GetColumnNames() /// \endcode @@ -152,23 +147,15 @@ /// instance. /// /// -/// Example 1 Generate DataSet from config file: +/// Example: /// \code /// restRoot -/// [0] TRestDataSet d("dataset", "dataSetName"); -/// [1] d.GenerateDataSet(); +/// [0] TRestDataSet d("dataset"); +/// [1] d.Initialize(); /// [2] d.Export("mydataset.csv"); /// [3] d.Export("mydataset.root"); /// \endcode /// -/// Example 2 Import existing DataSet: -/// \code -/// restRoot -/// [0] TRestDataSet d(); -/// [1] d.Import("myDataSet.root"); -/// [2] d.GetTree()->GetEntries() -/// \endcode -/// /// ### Relevant quantities /// /// Sometimes we will be willing that our dataset contains few variables @@ -262,16 +249,12 @@ TRestDataSet::TRestDataSet(const char* cfgFileName, const std::string& name) : T TRestDataSet::~TRestDataSet() {} /////////////////////////////////////////////// -/// \brief This function initialize different parameters -/// from the TRestDataSet -/// -void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); } - -/////////////////////////////////////////////// -/// \brief This function generates the data frame with the filelist and column names +/// \brief It will initialize the data frame with the filelist and column names /// (or observables) that have been defined by the user. /// -void TRestDataSet::GenerateDataSet() { +void TRestDataSet::Initialize() { + SetSectionName(this->ClassName()); + if (fTree != nullptr) { RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::Initialize ... " << RESTendl; return; @@ -280,43 +263,29 @@ void TRestDataSet::GenerateDataSet() { if (fFileSelection.empty()) FileSelection(); // We are not ready yet - if (fFileSelection.empty()) { - RESTError << "File selection is empty " << RESTendl; - return; - } + if (fFileSelection.empty()) return; ///// Disentangling process observables --> producing finalList - TRestRun run(fFileSelection.front()); + TRestRun run(fFileSelection[0]); std::vector finalList; finalList.push_back("runOrigin"); finalList.push_back("eventID"); finalList.push_back("timeStamp"); - auto obsNames = run.GetAnalysisTree()->GetObservableNames(); - for (const auto& obs : fObservablesList) { - if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) { - finalList.push_back(obs); - } else { - RESTWarning << " Observable " << obs << " not found in observable list, skipping..." << RESTendl; - } - } + for (const auto& obs : fObservablesList) finalList.push_back(obs); + std::vector obsNames = run.GetAnalysisTree()->GetObservableNames(); for (const auto& name : obsNames) { for (const auto& pcs : fProcessObservablesList) { if (name.find(pcs) == 0) finalList.push_back(name); } } - - // Remove duplicated observables if any - std::sort(finalList.begin(), finalList.end()); - finalList.erase(std::unique(finalList.begin(), finalList.end()), finalList.end()); + /////// ROOT::EnableImplicitMT(); fDataSet = ROOT::RDataFrame("AnalysisTree", fFileSelection); - fDataSet = MakeCut(fCut); - std::string user = getenv("USER"); std::string fOutName = "/tmp/rest_output_" + user + ".root"; fDataSet.Snapshot("AnalysisTree", fOutName, finalList); @@ -326,6 +295,17 @@ void TRestDataSet::GenerateDataSet() { TFile* f = TFile::Open(fOutName.c_str()); fTree = (TTree*)f->Get("AnalysisTree"); + int cont = 0; + std::string obsListStr; + for (const auto& l : finalList) { + if (cont > 0) obsListStr += ":"; + obsListStr += l; + cont++; + } + + // We do this so that later we can recover the values using TTree::GetVal + fTree->Draw((TString)obsListStr, "", "goff"); + RESTInfo << " - Dataset initialized!" << RESTendl; } @@ -335,8 +315,8 @@ void TRestDataSet::GenerateDataSet() { std::vector TRestDataSet::FileSelection() { fFileSelection.clear(); - std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fFilterStartTime); - std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fFilterEndTime); + std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fStartTime); + std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fEndTime); if (!time_stamp_end || !time_stamp_start) { RESTError << "TRestDataSet::FileSelect. Start or end dates not properly formed. Please, check " @@ -410,10 +390,6 @@ std::vector TRestDataSet::FileSelection() { if (properties.strategy == "last") properties.value = value; } - if (run.GetStartTimestamp() < fStartTime) fStartTime = run.GetStartTimestamp(); - - if (run.GetEndTimestamp() > fEndTime) fEndTime = run.GetEndTimestamp(); - fTotalDuration += run.GetEndTimestamp() - run.GetStartTimestamp(); fFileSelection.push_back(file); } @@ -422,58 +398,14 @@ std::vector TRestDataSet::FileSelection() { return fFileSelection; } -/////////////////////////////////////////////// -/// \brief This function apply a TRestCut to the dataframe -/// and returns a dataframe with the applied cuts. Note that -/// the cuts are not applied directly to the dataframe on -/// TRestDataSet, to do so you should do fDataSet = MakeCut(fCut); -/// -ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) { - auto df = fDataSet; - - if (cut == nullptr) return df; - - auto paramCut = cut->GetParamCut(); - auto obsList = df.GetColumnNames(); - for (const auto& [param, condition] : paramCut) { - if (std::find(obsList.begin(), obsList.end(), param) != obsList.end()) { - std::string pCut = param + condition; - RESTDebug << "Applying cut " << pCut << RESTendl; - df = df.Filter(pCut); - } else { - RESTWarning << " Cut observable " << param << " not found in observable list, skipping..." - << RESTendl; - } - } - - auto cutString = cut->GetCutStrings(); - for (const auto& pCut : cutString) { - bool added = false; - for (const auto& obs : obsList) { - if (pCut.find(obs) != std::string::npos) { - RESTDebug << "Applying cut " << pCut << RESTendl; - df = df.Filter(pCut); - added = true; - break; - } - } - - if (!added) { - RESTWarning << " Cut string " << pCut << " not found in observable list, skipping..." << RESTendl; - } - } - - return df; -} - ///////////////////////////////////////////// -/// \brief Prints on screen the information about the metadata members of TRestDataSet +/// \brief Prints on screen the information about the metadata members of TRestAxionSolarFlux /// void TRestDataSet::PrintMetadata() { TRestMetadata::PrintMetadata(); - RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl; - RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl; + RESTMetadata << " - StartTime : " << fStartTime << RESTendl; + RESTMetadata << " - EndTime : " << fEndTime << RESTendl; RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl; RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl; RESTMetadata << " " << RESTendl; @@ -502,8 +434,7 @@ void TRestDataSet::PrintMetadata() { if (!fFilterMetadata.empty()) { RESTMetadata << " Metadata filters: " << RESTendl; RESTMetadata << " ----------------- " << RESTendl; - RESTMetadata << " - StartTime : " << fFilterStartTime << RESTendl; - RESTMetadata << " - EndTime : " << fFilterEndTime << RESTendl; + int n = 0; for (const auto& mdFilter : fFilterMetadata) { RESTMetadata << " - " << mdFilter << "."; @@ -578,7 +509,7 @@ void TRestDataSet::InitFromConfigFile() { std::vector obsList = REST_StringHelper::Split(observables, ","); - fObservablesList.insert(fObservablesList.end(), obsList.begin(), obsList.end()); + for (const auto& l : obsList) fObservablesList.push_back(l); observablesDefinition = GetNextElement(observablesDefinition); } @@ -631,8 +562,6 @@ void TRestDataSet::InitFromConfigFile() { quantityDefinition = GetNextElement(quantityDefinition); } - - fCut = (TRestCut*)InstantiateChildMetadata("TRestCut"); } /////////////////////////////////////////////// @@ -670,8 +599,8 @@ void TRestDataSet::Export(const std::string& filename) { ///// Writing header fprintf(f, "### TRestDataSet generated file\n"); fprintf(f, "### \n"); - fprintf(f, "### StartTime : %s\n", fFilterStartTime.c_str()); - fprintf(f, "### EndTime : %s\n", fFilterEndTime.c_str()); + fprintf(f, "### StartTime : %s\n", fStartTime.c_str()); + fprintf(f, "### EndTime : %s\n", fEndTime.c_str()); fprintf(f, "###\n"); fprintf(f, "### Accumulated run time (seconds) : %lf\n", fTotalDuration); fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.); @@ -708,16 +637,6 @@ void TRestDataSet::Export(const std::string& filename) { fprintf(f, "###\n"); fprintf(f, "### Data starts here\n"); - auto obsNames = fDataSet.GetColumnNames(); - std::string obsListStr = ""; - for (const auto& l : obsNames) { - if (!obsListStr.empty()) obsListStr += ":"; - obsListStr += l; - } - - // We do this so that later we can recover the values using TTree::GetVal - fTree->Draw((TString)obsListStr, "", "goff"); - for (unsigned int n = 0; n < fTree->GetEntries(); n++) { for (unsigned int m = 0; m < GetNumberOfBranches(); m++) { std::string bName = fTree->GetListOfBranches()->At(m)->GetName(); @@ -746,62 +665,3 @@ void TRestDataSet::Export(const std::string& filename) { << " not recognized" << RESTendl; } } - -/////////////////////////////////////////////// -/// \brief Operator to copy TRestDataSet metadata -/// -TRestDataSet& TRestDataSet::operator=(TRestDataSet& dS) { - SetName(dS.GetName()); - fFilterStartTime = dS.GetFilterStartTime(); - fFilterEndTime = dS.GetFilterEndTime(); - fStartTime = dS.GetStartTime(); - fEndTime = dS.GetEndTime(); - fFilePattern = dS.GetFilePattern(); - fObservablesList = dS.GetObservablesList(); - fProcessObservablesList = dS.GetProcessObservablesList(); - fFilterMetadata = dS.GetFilterMetadata(); - fFilterContains = dS.GetFilterContains(); - fFilterGreaterThan = dS.GetFilterGreaterThan(); - fFilterLowerThan = dS.GetFilterLowerThan(); - fQuantity = dS.GetQuantity(); - fTotalDuration = dS.GetTotalTimeInSeconds(); - fCut = dS.GetCut(); - - return *this; -} - -/////////////////////////////////////////////// -/// \brief This function imports metadata from a root file -/// it import metadata info from the previous dataSet -/// while it opens the analysis tree -/// -void TRestDataSet::Import(const std::string& fileName) { - if (TRestTools::GetFileNameExtension(fileName) != "root") { - RESTError << "Datasets can only be imported from root files" << RESTendl; - return; - } - - TFile* file = TFile::Open(fileName.c_str(), "READ"); - if (file != nullptr) { - TIter nextkey(file->GetListOfKeys()); - TKey* key; - while ((key = (TKey*)nextkey())) { - std::string kName = key->GetClassName(); - if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr && - REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) { - TRestDataSet* dS = file->Get(key->GetName()); - if (GetVerboseLevel() >= TRestStringOutput::REST_Verbose_Level::REST_Info) - dS->PrintMetadata(); - *this = *dS; - } - } - } else { - RESTError << "Cannot open " << fileName << RESTendl; - exit(1); - } - - RESTInfo << "Opening " << fileName << RESTendl; - fDataSet = ROOT::RDataFrame("AnalysisTree", fileName); - - fTree = (TTree*)file->Get("AnalysisTree"); -}