Skip to content

Commit

Permalink
fix: refactor config objects
Browse files Browse the repository at this point in the history
  • Loading branch information
tcjennings committed Jan 29, 2025
1 parent a101ef4 commit b32f994
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 40 deletions.
22 changes: 15 additions & 7 deletions src/lsst/cmservice/common/butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@
from lsst.utils.db_auth import DbAuth

from ..common import errors
from ..common.logging import LOGGER
from ..config import config

logger = LOGGER.bind(module=__name__)


# TODO this should be cached for the lifetime of the service
def parse_butler_repos_from_environment() -> Mapping[str, str]:
Expand Down Expand Up @@ -55,22 +58,27 @@ async def get_butler_config(repo: str, *, without_datastore: bool = False) -> Bu
repo_uri = parse_butler_repos_from_environment()[repo]
except KeyError:
# No such repo known to the service
# TODO log an error, otherwise let butler try to sort it out
logger.warning("Butler repo %s not known to environment.", repo)
repo_uri = repo

bc = ButlerConfig(
other=repo_uri,
without_datastore=without_datastore,
)
db_url = url.make_url(bc["registry"]["db"])
try:
bc = ButlerConfig(
other=repo_uri,
without_datastore=without_datastore,
)
except FileNotFoundError:
# No such repo known to Butler
logger.error("Butler repo %s not known.", repo)
raise RuntimeError("Unknown Butler Repo %s", repo)

try:
db_auth_info = yaml.safe_load(await Path(config.butler.authentication_file).read_text())
except FileNotFoundError:
# TODO Log error or warning
logger.error("No Butler Registry authentication secrets found.")
# delegate db auth info discovery to normal toolchain
return bc

db_url = url.make_url(bc["registry"]["db"])
db_auth = DbAuth(authList=db_auth_info).getAuth(
dialectname=db_url.drivername,
username=db_url.username,
Expand Down
1 change: 1 addition & 0 deletions src/lsst/cmservice/common/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

async def daemon_iteration(session: async_scoped_session) -> None:
iteration_start = datetime.now()
# TODO limit query to queues that do not have a "time_finished"
queue_entries = await session.execute(select(Queue).where(Queue.time_next_check < iteration_start))
logger.debug("Daemon Iteration: %s", iteration_start)

Expand Down
20 changes: 10 additions & 10 deletions src/lsst/cmservice/common/htcondor.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,17 +192,17 @@ def build_htcondor_submit_environment() -> Mapping[str, str]:
_CONDOR_COLLECTOR_HOST=config.htcondor.collector_host,
_CONDOR_SCHEDD_HOST=config.htcondor.schedd_host,
_CONDOR_SEC_CLIENT_AUTHENTICATION_METHODS=config.htcondor.authn_methods,
DAF_BUTLER_REPOSITORY_INDEX="/sdf/group/rubin/shared/data-repos.yaml",
_CONDOR_DAGMAN_MANAGER_JOB_APPEND_GETENV=str(config.htcondor.dagman_job_append_get_env),
DAF_BUTLER_REPOSITORY_INDEX=config.butler.repository_index,
FS_REMOTE_DIR=config.htcondor.fs_remote_dir,
HOME="/sdf/home/l/lsstsvc1",
LSST_VERSION="w_latest",
LSST_DISTRIB_DIR="/sdf/group/rubin/sw",
# LSST_DISTRIB_DIR="/cvmfs/sw.lsst.eu/linux-x86_64/lsst_distrib",
PGPASSFILE="/sdf/home/l/lsstsvc1/.lsst/postgres-credentials.txt",
PGUSER="rubin",
HOME=config.htcondor.user_home,
LSST_VERSION=config.bps.lsst_version,
LSST_DISTRIB_DIR=config.bps.lsst_distrib_dir,
# FIX: because there is no db-auth.yaml in lsstsvc1's home directory
PGPASSFILE=f"{config.htcondor.user_home}/.lsst/postgres-credentials.txt",
PGUSER=config.butler.default_username,
PATH=(
"/sdf/home/l/lsstsvc1/.local/bin:/sdf/home/l/lsstsvc1/bin:"
"/opt/slurm/slurm-curr/bin:/usr/local/bin:/usr/bin:"
"/usr/local/sbin:/usr/sbin"
f"{config.htcondor.user_home}/.local/bin:{config.htcondor.user_home}/bin:{config.slurm.home}:"
f"/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin"
),
)
4 changes: 2 additions & 2 deletions src/lsst/cmservice/common/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ async def submit_slurm_job(
try:
async with await open_process(
[
config.slurm.sbatch_bin,
f"{config.slurm.home}/sbatch",
"-o",
log_url,
"--mem",
Expand Down Expand Up @@ -119,7 +119,7 @@ async def check_slurm_job(
return StatusEnum.running
try:
async with await open_process(
[config.slurm.sacct_bin, "--parsable", "-b", "-j", slurm_id]
[f"{config.slurm.home}/sacct", "--parsable", "-b", "-j", slurm_id]
) as slurm_check: # pragma: no cover
await slurm_check.wait()
if slurm_check.returncode != 0:
Expand Down
37 changes: 26 additions & 11 deletions src/lsst/cmservice/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ class BpsConfiguration(BaseModel):
FIXME: rename to LsstConfiguration and consolidate multiple models?
"""

lsst_version: str = Field(
description="Default LSST version",
default="w_latest",
)

lsst_distrib_dir: str = Field(
description="Default distribution directory from which to setup lsst",
default="/sdf/group/rubin/sw",
)

bps_bin: str = Field(
description="Name of a bps client binary",
default="bps",
Expand Down Expand Up @@ -61,6 +71,11 @@ class ButlerConfiguration(BaseModel):
default="~/.lsst/db-auth.yaml",
)

default_username: str = Field(
description="Default username to use for Butler registry authentication",
default="rubin",
)

access_token: str | None = Field(
description=("Gafaelfawr access token used to authenticate to a Butler server."),
default=None,
Expand Down Expand Up @@ -99,6 +114,11 @@ class HTCondorConfiguration(BaseModel):
their serialization alias.
"""

user_home: str = Field(
description=("Path to the user's home directory, as resolvable from an htcondor access node."),
default="/sdf/home/l/lsstsvc1",
)

condor_home: str = Field(
description=("Path to the Condor home directory. Equivalent to the condor ``RELEASE_DIR`` macro."),
default="/opt/htcondor",
Expand Down Expand Up @@ -193,6 +213,8 @@ class HTCondorConfiguration(BaseModel):
)


# TODO deprecate and remove "slurm"-specific logic from cm-service; it is
# unlikely that interfacing with slurm directly from k8s will be possible.
class SlurmConfiguration(BaseModel):
"""Configuration settings for slurm client operations.
Expand All @@ -202,21 +224,14 @@ class SlurmConfiguration(BaseModel):
----
Default SBATCH_* variables could work just as well, but it is useful to
have this as a document of what settings are actually used.
These settings should also apply to htcondor resource allocation jobs
that are equivalent to "allocateNodes"
"""

home: str = Field(
description="Location of the installed slurm client binaries",
default="",
)

sacct_bin: str = Field(
description="Name of sacct slurm client binary",
default="sacct",
)

sbatch_bin: str = Field(
description="Name of sbatch slurm client binary",
default="sbatch",
default="/opt/slurm/slurm-curr/bin",
)

memory: str = Field(
Expand Down
3 changes: 0 additions & 3 deletions tests/common/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,18 @@ def test_config_nested_partial_update(monkeypatch: Any) -> None:
config_a = Configuration()
assert not config_a.db.echo
assert config_a.htcondor.condor_q_bin == "condor_q"
assert config_a.slurm.sacct_bin == "sacct"
assert config_a.asgi.port == 8080
assert config_a.logging.profile == "development"

monkeypatch.setenv("LOGGING__PROFILE", "production")
monkeypatch.setenv("ASGI__PORT", "5000")
monkeypatch.setenv("HTCONDOR__CONDOR_Q_BIN", "/usr/local/bin/condor_q")
monkeypatch.setenv("SLURM__SACCT_BIN", "/opt/slurm/bin/sacct")
monkeypatch.setenv("DB__ECHO", "1")

# Partial updates in nested configuration models
config_b = Configuration()
assert config_b.db.echo
assert config_b.htcondor.condor_q_bin == "/usr/local/bin/condor_q"
assert config_b.slurm.sacct_bin == "/opt/slurm/bin/sacct"
assert config_b.asgi.port == 5000
assert config_b.logging.profile == "production"

Expand Down
6 changes: 2 additions & 4 deletions tests/db/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,8 @@ async def test_handlers_group_level_db(
dict(
lsst_distrib_dir="lsst_distrib_dir",
bps_core_yaml_template="bps_core_yaml_template",
bps_core_script_template="bps_core_script_template",
bps_core_script_template="stack_script_template",
bps_panda_script_template="bps_panda_script_template",
bps_htcondor_script_template="bps_htcondor_script_template",
manifest_script_template="stack_script_template",
)

Expand Down Expand Up @@ -304,9 +303,8 @@ async def test_handlers_job_level_db(
data = dict(
lsst_distrib_dir="lsst_distrib_dir",
bps_core_yaml_template="bps_core_yaml_template",
bps_core_script_template="bps_core_script_template",
bps_core_script_template="stack_script_template",
bps_panda_script_template="bps_panda_script_template",
bps_htcondor_script_template="bps_htcondor_script_template",
manifest_script_template="stack_script_template",
)

Expand Down
6 changes: 3 additions & 3 deletions tests/db/test_micro.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ async def test_micro_db(
with pytest.raises(errors.CMSpecificationError):
await specification.get_script_template(session, "bad")

script_template = await specification.get_script_template(session, "bps_core_script_template")
assert script_template.name == "bps_core_script_template", "Script template name mismatch"
script_template = await specification.get_script_template(session, "stack_script_template")
assert script_template.name == "stack_script_template", "Script template name mismatch"

await script_template.update_from_file(
session,
script_template.name,
"examples/templates/example_bps_core_script_template.yaml",
"examples/templates/example_stack_script_template.yaml",
)

campaign = await interface.load_and_create_campaign(
Expand Down

0 comments on commit b32f994

Please sign in to comment.