Skip to content

Commit

Permalink
Merge pull request #345 from rest-for-physics/jgalan_dataset
Browse files Browse the repository at this point in the history
Implementation of TRestDataSet
  • Loading branch information
jgalan authored Dec 13, 2022
2 parents cadb987 + 17229cc commit 7877c37
Show file tree
Hide file tree
Showing 5 changed files with 819 additions and 8 deletions.
13 changes: 13 additions & 0 deletions examples/dataset.rml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0"?>
<TRestDataSet name="BabyIAXO">
<parameter name="startTime" value="2022/04/28 00:00"/>
<parameter name="endTime" value="2022/11/25 13:20"/>
<parameter name="filePattern" value="../../framework-data/pipeline/dataset/BabyIAXO_TrueWolterMicromegasTest*.root"/>
<filter metadata="TRestRun::fRunNumber" greaterThan="370" lowerThan="375"/>
<filter metadata="TRestRun::fRunTag" contains="Test"/>
<observables list="window_transmission,optics_efficiency,axionPhoton_probability,axionPhoton_transmission,boreExitGate_transmission"/>
<processObservables list="final"/>
<quantity name="Nsim" metadata="[TRestProcessRunner::fEventsToProcess]" strategy="accumulate" description="The total number of simulated events."/>
<quantity name="SolarFlux" metadata="[axionGen-&gt;fTotalFlux]" strategy="unique" description="The integrated solar flux at earth (in cm-2 s-1) at the given energy range."/>
<quantity name="GeneratorArea" metadata="3.1416 * [axionGen::fTargetRadius]/10 * [axionGen::fTargetRadius]/10" strategy="unique" description="The area where the generator launched events (in cm2)."/>
</TRestDataSet>
2 changes: 1 addition & 1 deletion source/framework/core/inc/TRestAnalysisTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ class TRestAnalysisTree : public TTree {
Bool_t EvaluateCuts(const std::string& expression);
Bool_t EvaluateCut(const std::string& expression);

TString GetStringWithObservableNames();
std::vector<std::string> GetObservableNames();

std::vector<std::string> GetCutObservables(const std::string& cut_str);

Expand Down
131 changes: 131 additions & 0 deletions source/framework/core/inc/TRestDataSet.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*************************************************************************
* This file is part of the REST software framework. *
* *
* Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
* For more information see https://gifna.unizar.es/trex *
* *
* REST is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* REST is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have a copy of the GNU General Public License along with *
* REST in $REST_PATH/LICENSE. *
* If not, see https://www.gnu.org/licenses/. *
* For the list of contributors see $REST_PATH/CREDITS. *
*************************************************************************/

#ifndef REST_TRestDataSet
#define REST_TRestDataSet

#include <TTimeStamp.h>

#include <ROOT/RDataFrame.hxx>

#include "TRestMetadata.h"

struct RelevantQuantity {
/// The associated metadata member used to register the relevant quantity
std::string metadata;

/// It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
std::string strategy;

/// A user given description that can be used to define the relevant quantity
std::string description;

/// The quantity value
Double_t value;
};

/// It allows to group a number of runs that satisfy given metadata conditions
class TRestDataSet : public TRestMetadata {
private:
/// All the selected runs will have a starting date after fStartTime
std::string fStartTime = "2000/01/01"; //<

/// All the selected runs will have an ending date before fEndTime
std::string fEndTime = "3000/12/31"; //<

/// A glob file pattern that must be satisfied by all files
std::string fFilePattern = ""; //<

/// It contains a list of the observables that will be added to the final tree or exported file
std::vector<std::string> fObservablesList; //<

/// It contains a list of the process where all observables should be added
std::vector<std::string> fProcessObservablesList; //<

/// A list of metadata members where filters will be applied
std::vector<std::string> fFilterMetadata; //<

/// If not empty it will check if the metadata member contains the value
std::vector<std::string> fFilterContains; //<

/// If the corresponding element is not empty it will check if the metadata member is greater
std::vector<Double_t> fFilterGreaterThan; //<

/// If the corresponding element is not empty it will check if the metadata member is lower
std::vector<Double_t> fFilterLowerThan; //<

/// The properties of a relevant quantity that we want to store together with the dataset
std::map<std::string, RelevantQuantity> fQuantity; //<

/// The total integrated run time of selected files
Double_t fTotalDuration = 0; //<

/// The resulting RDataFrame object after initialization
ROOT::RDataFrame fDataSet = 0; //!

/// A pointer to the generated tree
TTree* fTree = nullptr; //!

/// A list populated by the FileSelection method using the conditions of the dataset
std::vector<std::string> fFileSelection; //!

void InitFromConfigFile() override;

protected:
virtual std::vector<std::string> FileSelection();

public:
/// Gives access to the RDataFrame
ROOT::RDataFrame GetDataFrame() const {
if (fTree == nullptr) RESTWarning << "DataFrame has not been yet initialized" << RESTendl;
return fDataSet;
}

/// Gives access to the tree
TTree* GetTree() const {
if (fTree == nullptr) RESTWarning << "Tree has not been yet initialized" << RESTendl;
return fTree;
}

/// Number of variables (or observables)
size_t GetNumberOfColumns() { return fDataSet.GetColumnNames().size(); }

/// Number of variables (or observables)
size_t GetNumberOfBranches() { return GetNumberOfColumns(); }

/// It returns a list of the files that have been finally selected
std::vector<std::string> GetFileSelection() { return fFileSelection; }

/// It returns the accumulated run time in seconds
Double_t GetTotalTimeInSeconds() const { return fTotalDuration; }

void Export(const std::string& filename);

void PrintMetadata() override;
void Initialize() override;
TRestDataSet();
TRestDataSet(const char* cfgFileName, const std::string& name = "");
~TRestDataSet();

ClassDefOverride(TRestDataSet, 1);
};
#endif
15 changes: 8 additions & 7 deletions source/framework/core/src/TRestAnalysisTree.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1022,18 +1022,19 @@ Double_t TRestAnalysisTree::GetObservableMinimum(const TString& obsName, Double_
}

///////////////////////////////////////////////
/// \brief It returns a string containing all the observables that exist in the analysis tree.
/// \brief It returns a vector with strings containing all the observables that exist in
/// the analysis tree.
///
TString TRestAnalysisTree::GetStringWithObservableNames() {
Int_t nEntries = GetEntries();
std::vector<std::string> TRestAnalysisTree::GetObservableNames() {
std::vector<std::string> names;

// Int_t nEntries = GetEntries();
auto branches = GetListOfBranches();
std::string branchNames = "";
for (int i = 0; i < branches->GetEntries(); i++) {
if (i > 0) branchNames += " ";
branchNames += (string)branches->At(i)->GetName();
names.push_back((string)branches->At(i)->GetName());
}

return (TString)branchNames;
return names;
}

///////////////////////////////////////////////
Expand Down
Loading

0 comments on commit 7877c37

Please sign in to comment.