Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add task scheduler to backend services (cron jobs) #2147

Merged
merged 10 commits into from
Feb 20, 2025
Merged
37 changes: 36 additions & 1 deletion compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ services:
volumes:
- fmtm_logs:/opt/logs
- ./src/backend/pyproject.toml:/opt/pyproject.toml:ro
- ./src/backend/app:/opt/app
- ./src/backend/app:/opt/app:ro
- ./src/backend/tests:/opt/tests:ro
- ./src/backend/stats:/opt/stats:ro
# - ../osm-fieldwork/osm_fieldwork:/opt/python/lib/python3.12/site-packages/osm_fieldwork:ro
Expand Down Expand Up @@ -352,3 +352,38 @@ services:
restart: "on-failure:2"
healthcheck:
test: ["NONE"] # Set the health check test to NONE to disable it

scheduler:
image: "ghcr.io/hotosm/fmtm/backend:${TAG_OVERRIDE:-debug}"
depends_on:
fmtm-db:
condition: service_healthy
env_file:
- .env
environment:
DEBUG: false
volumes:
- ./src/backend/app:/opt/app:ro
- ./src/backend/scheduler:/opt/scheduler:ro
networks:
- fmtm-net
entrypoint: ["/bin/sh", "-c"]
# The approach below allows us to easily switch to Kubernetes CronJob if needed
command: |
"
# Task unlocking every 3hrs
echo '* */3 * * * /opt/scheduler/unlock_tasks.py' > ./crontab

# Check inactive users every Sunday 00:00
echo '0 0 * * 0 /opt/scheduler/inactive_users.py' >> ./crontab

exec /usr/local/bin/supercronic ./crontab
"
restart: "unless-stopped"
# Check the 'supercronic' service is still running
healthcheck:
test: ["CMD", "pgrep", "supercronic"]
interval: 5m
timeout: 10s
retries: 3
start_period: 10s
8 changes: 7 additions & 1 deletion deploy/compose.development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,6 @@ services:
MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY}
MINIO_VOLUMES: "/mnt/data"
MINIO_BROWSER: "off"
# MINIO_CONSOLE_ADDRESS: ":9090"
volumes:
- fmtm_data:/mnt/data
networks:
Expand Down Expand Up @@ -289,6 +288,13 @@ services:
entrypoint: ["/migrate-entrypoint.sh"]
restart: "on-failure:2"

scheduler:
extends:
file: ../compose.yaml
service: scheduler
image: "ghcr.io/hotosm/fmtm/backend:${GIT_BRANCH}"
volumes: []

certbot:
image: "ghcr.io/hotosm/fmtm/proxy:certs-init-development"
volumes:
Expand Down
148 changes: 27 additions & 121 deletions deploy/compose.main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,141 +70,47 @@ services:
restart: "unless-stopped"

api:
image: "ghcr.io/hotosm/fmtm/backend:main"
volumes:
- fmtm_logs:/opt/logs
depends_on:
fmtm-db:
condition: service_healthy
migrations:
condition: service_completed_successfully
s3:
condition: service_healthy
env_file:
- .env
networks:
- fmtm-net
restart: "unless-stopped"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/__lbheartbeat__"]
start_period: 60s
interval: 10s
timeout: 5s
retries: 10
deploy:
replicas: ${API_REPLICAS:-4}
resources:
limits:
cpus: "0.9"
memory: 1500M
reservations:
cpus: "0.1"
memory: 100M
extends:
file: compose.staging.yaml
service: api

ui:
# This service simply builds the frontend to a volume
# accessible to the proxy, then shuts down
image: "ghcr.io/hotosm/fmtm/frontend:main"
build:
context: src
dockerfile: Dockerfile.ui.prod
args:
APP_VERSION: main
VITE_API_URL: https://${FMTM_API_DOMAIN:-api.${FMTM_DOMAIN}}
VITE_SYNC_URL: https://${FMTM_SYNC_DOMAIN:-sync.${FMTM_DOMAIN}}
volumes:
- fmtm_frontend:/frontend
network_mode: none
restart: "on-failure:2"
extends:
file: compose.staging.yaml
service: ui

s3:
image: "docker.io/minio/minio:${MINIO_TAG:-RELEASE.2025-01-20T14-49-07Z}"
environment:
MINIO_ROOT_USER: ${S3_ACCESS_KEY}
MINIO_ROOT_PASSWORD: ${S3_SECRET_KEY}
MINIO_VOLUMES: "/mnt/data"
MINIO_BROWSER: "off"
volumes:
- fmtm_data:/mnt/data
networks:
- fmtm-net
command: minio server
restart: "unless-stopped"
healthcheck:
test: timeout 5s bash -c ':> /dev/tcp/127.0.0.1/9000' || exit 1
interval: 5s
retries: 3
start_period: 5s
timeout: 5s
extends:
file: compose.staging.yaml
service: s3

fmtm-db:
image: "postgis/postgis:${POSTGIS_TAG:-14-3.5-alpine}"
command: -c 'max_connections=300' -c 'wal_level=logical'
volumes:
- fmtm_db_data:/var/lib/postgresql/data/
environment:
- POSTGRES_USER=${FMTM_DB_USER}
- POSTGRES_PASSWORD=${FMTM_DB_PASSWORD}
- POSTGRES_DB=${FMTM_DB_NAME}
ports:
- "5433:5432"
networks:
- fmtm-net
restart: "unless-stopped"
healthcheck:
test: pg_isready -U ${FMTM_DB_USER} -d ${FMTM_DB_NAME}
start_period: 5s
interval: 10s
timeout: 5s
retries: 3
extends:
file: compose.staging.yaml
service: fmtm-db

electric:
image: "electricsql/electric:${ELECTRIC_TAG:-1.0.0-beta.10}"
depends_on:
fmtm-db:
condition: service_healthy
migrations:
condition: service_completed_successfully
environment:
DATABASE_URL: postgresql://${FMTM_DB_USER}:${FMTM_DB_PASSWORD}@${FMTM_DB_HOST:-fmtm-db}/${FMTM_DB_NAME:-fmtm}?sslmode=disable
# OTEL_EXPORT: otlp
# OTLP_ENDPOINT: https://...
# ELECTRIC_WRITE_TO_PG_MODE: direct_writes
networks:
- fmtm-net
restart: "unless-stopped"
extends:
file: compose.staging.yaml
service: electric

migrations:
image: "ghcr.io/hotosm/fmtm/backend:main"
depends_on:
fmtm-db:
condition: service_healthy
s3:
condition: service_healthy
env_file:
- .env
networks:
- fmtm-net
entrypoint: ["/migrate-entrypoint.sh"]
restart: "on-failure:2"
extends:
file: compose.staging.yaml
service: migrations

scheduler:
extends:
file: compose.staging.yaml
service: scheduler

backups:
image: "ghcr.io/hotosm/fmtm/backend:main"
backup:
extends:
file: compose.staging.yaml
service: backup
depends_on:
fmtm-db:
condition: service_healthy
env_file:
- .env
networks:
- fmtm-net
entrypoint: ["/backup-entrypoint.sh"]
restart: "on-failure:2"
healthcheck:
test: pg_isready -U ${FMTM_DB_USER} -d ${FMTM_DB_NAME}
start_period: 5s
interval: 10s
timeout: 5s
retries: 3

certbot:
image: "ghcr.io/hotosm/fmtm/proxy:certs-init-main"
Expand Down
4 changes: 4 additions & 0 deletions deploy/compose.staging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ services:
extends:
file: compose.development.yaml
service: migrations
scheduler:
extends:
file: compose.development.yaml
service: scheduler
backups:
image: "ghcr.io/hotosm/fmtm/backend:${GIT_BRANCH}"
depends_on:
Expand Down
1 change: 1 addition & 0 deletions src/backend/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
!pyproject.toml
!uv.lock
!migrations
!scheduler
19 changes: 19 additions & 0 deletions src/backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@ ENV LANG=en_US.UTF-8 \
STOPSIGNAL SIGINT


# Get supercronic binary for userland scheduling (cron daemon needs root)
FROM base AS supercronic
ENV SUPERCRONIC_URL=https://github.com/aptible/supercronic/releases/download/v0.2.33/supercronic-linux-amd64 \
SUPERCRONIC_SHA1SUM=71b0d58cc53f6bd72cf2f293e09e294b79c666d8 \
SUPERCRONIC=supercronic-linux-amd64
RUN curl -fsSLO "$SUPERCRONIC_URL" \
&& echo "${SUPERCRONIC_SHA1SUM} ${SUPERCRONIC}" | sha1sum -c - \
&& chmod +x "$SUPERCRONIC" \
&& mv "$SUPERCRONIC" /supercronic


# Build stage will all dependencies required to build Python wheels
FROM base AS build
# NOTE the MONITORING argument is specified during production build on Github workflow
Expand Down Expand Up @@ -106,11 +117,17 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \
REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt \
CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
# Packages:
# - procps: pgrep util for heathchecks
# - gettext-base: envsubst util for dotenv substitution
# - postgresql-client: pg_isready util for healthchecks
# - mime-support: web mimetype support
RUN apt-get update --quiet \
&& DEBIAN_FRONTEND=noninteractive \
apt-get install -y --quiet --no-install-recommends \
"nano" \
"curl" \
"procps" \
"gettext-base" \
"libpcre3" \
"mime-support" \
Expand All @@ -125,6 +142,7 @@ COPY --from=minio /usr/bin/mc /usr/local/bin/
# Copy basemap generation binaries
COPY --from=basemap-bins /usr/bin/tilepack /usr/local/bin/
COPY --from=basemap-bins /usr/bin/pmtiles /usr/local/bin/
COPY --from=supercronic /supercronic /usr/local/bin/
COPY *-entrypoint.sh /
ENTRYPOINT ["/app-entrypoint.sh"]
# Copy Python deps from build to runtime
Expand All @@ -134,6 +152,7 @@ WORKDIR /opt
COPY app/ /opt/app/
COPY migrations/ /opt/migrations/
COPY stats/ /opt/stats/
COPY scheduler/ /opt/scheduler/
# Add non-root user, permissions
RUN useradd -u 1001 -m -c "fmtm account" -d /home/appuser -s /bin/false appuser \
&& mkdir -p /opt/logs \
Expand Down
9 changes: 6 additions & 3 deletions src/backend/app/tasks/task_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
from psycopg import Connection

from app.auth.auth_schemas import ProjectUserDict
from app.auth.roles import mapper
from app.auth.roles import mapper, super_admin
from app.db.database import db_conn
from app.db.enums import HTTPStatus
from app.db.models import DbTask, DbTaskEvent
from app.db.models import DbTask, DbTaskEvent, DbUser
from app.tasks import task_crud, task_schemas

router = APIRouter(
Expand Down Expand Up @@ -118,7 +118,10 @@ async def get_task_event_history(


@router.post("/unlock-tasks")
async def unlock_tasks(db: Annotated[Connection, Depends(db_conn)]):
async def unlock_tasks(
db: Annotated[Connection, Depends(db_conn)],
current_user: Annotated[DbUser, Depends(super_admin)],
):
"""Endpoint to trigger unlock_old_locked_tasks manually."""
log.info("Start processing inactive tasks")
await task_crud.trigger_unlock_tasks(db)
Expand Down
21 changes: 13 additions & 8 deletions src/backend/app/users/user_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,26 @@
#
"""Logic for user routes."""

import os
from datetime import datetime, timedelta, timezone
from textwrap import dedent
from typing import Optional

from fastapi import Request
from loguru import logger as log
from osm_login_python.core import Auth
from psycopg import Connection
from psycopg.rows import class_row

from app.auth.providers.osm import get_osm_token, send_osm_message
from app.auth.providers.osm import send_osm_message
from app.db.models import DbUser
from app.projects.project_crud import get_pagination

SVC_OSM_TOKEN = os.getenv("SVC_OSM_TOKEN", None)
WARNING_INTERVALS = [21, 14, 7] # Days before deletion
INACTIVITY_THRESHOLD = 2 * 365 # 2 years approx


async def process_inactive_users(
db: Connection,
request: Request,
osm_auth: Auth,
):
"""Identify inactive users, send warnings, and delete accounts."""
now = datetime.now(timezone.utc)
Expand All @@ -48,8 +46,6 @@ async def process_inactive_users(
]
deletion_threshold = now - timedelta(days=INACTIVITY_THRESHOLD)

osm_token = get_osm_token(request, osm_auth)

async with db.cursor() as cur:
# Users eligible for warnings
for days, warning_date in zip(
Expand All @@ -71,7 +67,16 @@ async def process_inactive_users(
users_to_warn = await cur.fetchall()

for user in users_to_warn:
await send_warning_email_or_osm(user.id, user.username, days, osm_token)
if SVC_OSM_TOKEN:
await send_warning_email_or_osm(
user.id, user.username, days, SVC_OSM_TOKEN
)
else:
log.warning(
f"The SVC_OSM_TOKEN is not set on this server. "
f"Cannot send emails to inactive users: "
f"{', '.join(user.username for user in users_to_warn)}"
)

# Users eligible for deletion
async with db.cursor(row_factory=class_row(DbUser)) as cur:
Expand Down
Loading