Skip to content

Commit

Permalink
Run grid tasks in singularity.
Browse files Browse the repository at this point in the history
  • Loading branch information
yrath committed Jul 16, 2020
1 parent 18e0964 commit bd2b4e0
Show file tree
Hide file tree
Showing 12 changed files with 195 additions and 119 deletions.
3 changes: 2 additions & 1 deletion analysis/config/jet_tagging_sf.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@

# add auxiliary info to base config
cfg.set_aux("sandboxes", {
"slc6": "singularity::/cvmfs/singularity.opensciencegrid.org/bbockelm/cms:rhel6",
"slc6": "singularity::/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel6-m20200612",
"slc7": "singularity::/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel7-m20200612",
"NO_SANDBOX": "NO_STR",
})

Expand Down
44 changes: 27 additions & 17 deletions analysis/tasks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class GridWorkflow(AnalysisTask, law.glite.GLiteWorkflow, law.arc.ARCWorkflow):
}

sl_distribution_map = collections.defaultdict(lambda: "slc7", {"RWTH": "slc6"})
req_sandbox = "NO_SANDBOX"
req_sandbox = "slc7" # sandbox to run in on the grid

grid_ce = law.CSVParameter(default=["RWTH"], significant=False, description="target computing "
"element(s)")
Expand All @@ -267,28 +267,25 @@ def _setup_workflow_requires(self, reqs):

if not len(set([self.sl_distribution_map[ce] for ce in self.grid_ce])) == 1:
raise Exception("Cannot submit to multiple CEs running different distributions.")

if self.sl_distribution_map[self.grid_ce[0]] != self.sl_dist_version:
self.req_sandbox = self.sl_distribution_map[self.grid_ce[0]]
self.remote_sl_dist_version = self.sl_distribution_map[self.grid_ce[0]]

reqs["cmssw"] = UploadCMSSW.req(self, replicas=10, _prefer_cli=["replicas"],
sandbox=self.config_inst.get_aux("sandboxes")[self.req_sandbox])
reqs["software"] = UploadSoftware.req(self, replicas=10, _prefer_cli=["replicas"],
sandbox=self.config_inst.get_aux("sandboxes")[self.remote_sl_dist_version])
reqs["sandbox_software"] = UploadSoftware.req(self, replicas=10, _prefer_cli=["replicas"],
sandbox=self.config_inst.get_aux("sandboxes")[self.req_sandbox])
reqs["repo"] = UploadRepo.req(self, replicas=10, _prefer_cli=["replicas"])

def _setup_render_variables(self, config, reqs):
config.render_variables["jtsf_grid_user"] = os.getenv("JTSF_GRID_USER")
config.render_variables["jtsf_cmssw_setup"] = os.getenv("JTSF_CMSSW_SETUP")
config.render_variables["sandbox_jtsf_dist_version"] = self.req_sandbox
config.render_variables["cmssw_base_url"] = reqs["cmssw"].output().dir.uri()

scram_arch = os.getenv("SCRAM_ARCH")
if self.req_sandbox != "NO_SANDBOX":
scram_arch = scram_arch.replace(self.sl_dist_version, self.req_sandbox)
config.render_variables["scram_arch"] = scram_arch

config.render_variables["cmssw_version"] = os.getenv("CMSSW_VERSION")
config.render_variables["sandbox_cmssw_version"] = os.getenv("CMSSW_VERSION")
config.render_variables["software_base_url"] = reqs["software"].output().dir.uri()
config.render_variables["sandbox_software_base_url"] = reqs["sandbox_software"].output().dir.uri()
config.render_variables["repo_checksum"] = reqs["repo"].checksum
config.render_variables["repo_base"] = reqs["repo"].output().dir.uri()

Expand Down Expand Up @@ -410,22 +407,35 @@ class AnalysisSandboxTask(law.SandboxTask):

allow_empty_sandbox = True

def __init__(self, *args, **kwargs):
self.singularity_forward_law = lambda: False
self.singularity_allow_binds = lambda: False
super(AnalysisSandboxTask, self).__init__(*args, **kwargs)

def singularity_args(self):
if os.environ.get("JTSF_ON_GRID", 0) == "1":
return ["--bind", "/cvmfs"]
else:
return []

def sandbox_setup_cmds(self):
cmds = super(AnalysisSandboxTask, self).sandbox_setup_cmds()

if os.environ.get("JTSF_ON_GRID") == "1":
for var in ["JTSF_DATA", "JTSF_STORE", "JTSF_LOCAL_CACHE",
"JTSF_GRID_USER", "JTSF_ON_GRID", "TMP", "LAW_JOB_HOME"]:
cmds.append('export {}="{}"'.format(var, os.environ[var]))
# environment variables that may differ between sandbox and outer layer
for var in ["JTSF_SOFTWARE", "CMSSW_VERSION", "CMSSW_BASE", "X509_USER_PROXY"]:
cmds.append('export {}="{}"'.format(var, os.environ["SANDBOX_" + var]))

cmds.append('export JTSF_CMSSW_SETUP="{}"'.format(os.environ["JTSF_CMSSW_SETUP"]))
cmds.append("source {}".format(os.path.join(os.environ["JTSF_BASE"], "setup.sh")))
cmds.append("source {}".format(os.path.join(
os.environ["JTSF_BASE"], "singularity", "setup_$JTSF_DIST_VERSION.sh"))
os.environ["JTSF_BASE"], "singularity", "setup_slc7.sh"))
)

return cmds

def __init__(self, *args, **kwargs):
self.singularity_forward_law = lambda: False
self.singularity_allow_binds = lambda: False
super(AnalysisSandboxTask, self).__init__(*args, **kwargs)


class UploadCMSSW(AnalysisTask, law.tasks.TransferLocalFile, AnalysisSandboxTask,
law.cms.BundleCMSSW):
Expand Down
41 changes: 22 additions & 19 deletions analysis/tasks/files/grid_bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,17 @@ action() {
export GFAL_PLUGIN_DIR_ORIG="$GFAL_PLUGIN_DIR"

export JTSF_DATA="$TMP/jtsf_data"
export JTSF_SOFTWARE="$JTSF_DATA/software"
export JTSF_SOFTWARE="$JTSF_DATA/$JTSF_DIST_VERSION/software"
export SANDBOX_JTSF_SOFTWARE="$JTSF_DATA/{{sandbox_jtsf_dist_version}}/software"

export JTSF_STORE="$JTSF_DATA/store"
export JTSF_LOCAL_CACHE="$JTSF_DATA/cache"

export JTSF_GRID_USER="{{jtsf_grid_user}}"
export JTSF_CMSSW_SETUP="{{jtsf_cmssw_setup}}"

export SCRAM_ARCH="{{scram_arch}}"
export CMSSW_VERSION="{{cmssw_version}}"
export CMSSW_BASE="$JTSF_DATA/cmssw/$CMSSW_VERSION"
export JTSF_CMSSW_SETUP="{{jtsf_cmssw_setup}}"
export SANDBOX_CMSSW_VERSION="{{sandbox_cmssw_version}}"
export SANDBOX_CMSSW_BASE="$JTSF_DATA/cmssw/{{sandbox_scram_arch}}/$SANDBOX_CMSSW_VERSION"

export JTSF_ON_GRID="1"

Expand All @@ -58,14 +60,9 @@ action() {
# load CMSSW
#

source "/cvmfs/cms.cern.ch/cmsset_default.sh"
mkdir -p "$( dirname "$CMSSW_BASE" )"
cd "$( dirname "$CMSSW_BASE" )"
scramv1 project CMSSW "$CMSSW_VERSION"
cd "$CMSSW_VERSION"
load_replica "{{cmssw_base_url}}" "$CMSSW_VERSION\.\d+\.tgz" "cmssw.tgz"
tar -xzf "cmssw.tgz"
rm "cmssw.tgz"
mkdir -p "$( dirname "$SANDBOX_CMSSW_BASE" )"
cd "$( dirname "$SANDBOX_CMSSW_BASE" )"
load_replica "{{cmssw_base_url}}" "$SANDBOX_CMSSW_VERSION\.\d+\.tgz" "cmssw.tgz"
cd "$TMP"

#
Expand All @@ -79,6 +76,15 @@ action() {
rm "software.tgz"
cd "$TMP"

if [[ "$JTSF_SOFTWARE" != "$SANDBOX_JTSF_SOFTWARE" ]]; then
mkdir -p "$SANDBOX_JTSF_SOFTWARE"
cd "$SANDBOX_JTSF_SOFTWARE"
load_replica "{{sandbox_software_base_url}}" "software\.\d+\.tgz" "software.tgz"
tar -xzf "software.tgz"
rm "software.tgz"
cd "$TMP"
fi

#
# load the repo bundle
#
Expand All @@ -87,12 +93,9 @@ action() {
tar -xzf "repo.tgz"
rm "repo.tgz"

# setup CMSSW

cd "$CMSSW_BASE/src"
eval `scramv1 runtime -sh`
scram build
cd "$TMP"
# copy user proxy
cp $X509_USER_PROXY "grid.proxy"
export SANDBOX_X509_USER_PROXY="$TMP/grid.proxy"

# source the repo setup
source "jet-tagging-sf/setup.sh"
Expand Down
19 changes: 11 additions & 8 deletions analysis/tasks/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,21 @@
import luigi
import six

from analysis.tasks.base import AnalysisTask, DatasetTask, WrapperTask, GridWorkflow, HTCondorWorkflow
from analysis.tasks.base import AnalysisTask, DatasetTask, WrapperTask, GridWorkflow, HTCondorWorkflow, AnalysisSandboxTask
from analysis.tasks.external import GetDatasetLFNs, DownloadSetupFiles
from analysis.util import wget, determine_xrd_redirector
from analysis.config.jet_tagging_sf import xrd_redirectors

class WriteTrees(DatasetTask, GridWorkflow, law.LocalWorkflow, HTCondorWorkflow):

max_events = luigi.IntParameter(default=law.NO_INT)
class WriteTrees(DatasetTask, AnalysisSandboxTask, GridWorkflow, law.LocalWorkflow):

max_events = luigi.IntParameter(default=law.NO_INT)
workflow_run_decorators = [law.decorator.notify]

stream_input_file = False

sandbox = "singularity::/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel7-m20200612"

def workflow_requires(self):
if self.cancel_jobs or self.cleanup_jobs:
return {}
Expand Down Expand Up @@ -69,18 +71,19 @@ def run(self):
jes_unc_src_file = setup_files["jes_unc_src_file"] if self.dataset_inst.is_mc else ""

# determine the xrd redirector and download the file
redirector = determine_xrd_redirector(lfn)
redirector = xrd_redirectors[0] #determine_xrd_redirector(lfn)
xrd_url = "root://{}/{}".format(redirector, lfn)

if self.stream_input_file:
input_file = xrd_url
else:
input_file = "file://" + tmp_dir.child("input_file.root", type="f").path
cmd = "xrdcp-old {} {}".format(xrd_url, input_file)
input_file = "input_file.root"
cmd = "xrdcp {} {}".format(xrd_url, input_file)
with self.publish_step("download input file from {} ...".format(xrd_url)):
code = law.util.interruptable_popen(cmd, shell=True, executable="/bin/bash")[0]
code = law.util.interruptable_popen(cmd, shell=True, cwd=tmp_dir.path, executable="/bin/bash")[0]
if code != 0:
raise Exception("xrdcp failed")
input_file = "file://" + os.path.join(tmp_dir.path, input_file)

# cmsRun argument helper
def cmsRunArg(key, value):
Expand Down Expand Up @@ -159,7 +162,7 @@ class WriteTreesWrapper(WrapperTask):
wrapped_task = WriteTrees


class MergeTrees(DatasetTask, law.tasks.CascadeMerge, GridWorkflow, HTCondorWorkflow):
class MergeTrees(DatasetTask, AnalysisSandboxTask, law.tasks.CascadeMerge, GridWorkflow):

merge_factor = 25

Expand Down
43 changes: 26 additions & 17 deletions cmssw/setup_Legacy16.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,38 @@ action() {
eval `scramv1 runtime -sh`
scram b


#
# custom topics
#

# ECAL scale and resolution corrections
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/Egamma2017DataRecommendations
git cms-merge-topic cms-egamma:EgammaPostRecoTools_940
# Electron VID
git cms-merge-topic cms-egamma:EgammaID_949

# new DeepJet Training
git cms-addpkg RecoBTag/TensorFlow
git cherry-pick 94ceae257f846998c357fcad408986cc8a039152

# deterministics seeds
git cms-merge-topic yrath:deterministicSeeds
if [ "$JTSF_ON_GRID" == "1" ]; then # unpack custom installation from .tgz
cd "$( dirname "$CMSSW_BASE" )"
if [ -f "cmssw.tgz" ]; then
tar -xzvf "cmssw.tgz" --directory $CMSSW_BASE
rm "cmssw.tgz"
fi
cd "$CMSSW_BASE/src"
else
#
# custom topics
#

# ECAL scale and resolution corrections
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/Egamma2017DataRecommendations
git cms-merge-topic cms-egamma:EgammaPostRecoTools_940
# Electron VID
git cms-merge-topic cms-egamma:EgammaID_949

# new DeepJet Training
git cms-addpkg RecoBTag/TensorFlow
git cherry-pick 94ceae257f846998c357fcad408986cc8a039152

# deterministics seeds
git cms-merge-topic yrath:deterministicSeeds
fi

scram b -j "$scram_cores"

else
cd "$CMSSW_BASE/src"
eval `scramv1 runtime -sh`
scram build
fi

cd "$origin"
Expand Down
47 changes: 28 additions & 19 deletions cmssw/setup_Legacy17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,45 @@ action() {
eval `scramv1 runtime -sh`
scram b

if [ "$JTSF_ON_GRID" == "1" ]; then # unpack custom installation from .tgz
cd "$( dirname "$CMSSW_BASE" )"
if [ -f "cmssw.tgz" ]; then
tar -xzvf "cmssw.tgz" --directory $CMSSW_BASE
rm "cmssw.tgz"
fi
cd "$CMSSW_BASE/src"
else
#
# custom topics
#

#
# custom topics
#
# ECAL scale and resolution corrections
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/Egamma2017DataRecommendations
git cms-merge-topic cms-egamma:EgammaPostRecoTools_940
# Electron VID
git cms-merge-topic cms-egamma:EgammaID_949

# ECAL scale and resolution corrections
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/Egamma2017DataRecommendations
git cms-merge-topic cms-egamma:EgammaPostRecoTools_940
# Electron VID
git cms-merge-topic cms-egamma:EgammaID_949
# new DeepJet Training
git cms-addpkg RecoBTag/TensorFlow
git cherry-pick 94ceae257f846998c357fcad408986cc8a039152

# new DeepJet Training
git cms-addpkg RecoBTag/TensorFlow
git cherry-pick 94ceae257f846998c357fcad408986cc8a039152
# Updated MET filter
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/MissingETOptionalFiltersRun2#Moriond%202018
git cms-addpkg RecoMET/METFilters

# Updated MET filter
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/MissingETOptionalFiltersRun2#Moriond%202018
git cms-addpkg RecoMET/METFilters
# MET EE fix
git cms-merge-topic cms-met:METFixEE2017_949_v2

# MET EE fix
git cms-merge-topic cms-met:METFixEE2017_949_v2

# deterministics seeds
git cms-merge-topic yrath:deterministicSeeds
# deterministics seeds
git cms-merge-topic yrath:deterministicSeeds
fi

scram b -j "$scram_cores"

else
cd "$CMSSW_BASE/src"
eval `scramv1 runtime -sh`
scram build
fi

cd "$origin"
Expand Down
Loading

0 comments on commit bd2b4e0

Please sign in to comment.