From 062140df013f02ccb40fe0fe1832e181b286f8a1 Mon Sep 17 00:00:00 2001 From: Dan Fuchs Date: Wed, 22 Jan 2025 11:27:54 -0600 Subject: [PATCH] DM-48476: Sentry - the most basic integration possible Let's use Sentry in the most basic possible way and see what happens.Don't trace the SSE endpoint because Sentry holds spans in memory aslong as the connection is open. It seems like the uninstrumented SSEendopoint also holds on to memory for some reason that I (Dan) don'tunderstand, but tracing it definitely makes it worse. --- docs/user-guide/environment-variables.rst | 10 +++++++ pyproject.toml | 3 +++ src/timessquare/config.py | 32 ++++++++++++++++++++++ src/timessquare/handlers/internal.py | 7 ++++- src/timessquare/main.py | 10 +++++++ src/timessquare/sentry.py | 33 +++++++++++++++++++++++ src/timessquare/worker/main.py | 9 +++++++ tests/handlers/internal_test.py | 16 ++++++++--- tox.ini | 6 +++++ 9 files changed, 121 insertions(+), 5 deletions(-) create mode 100644 src/timessquare/sentry.py diff --git a/docs/user-guide/environment-variables.rst b/docs/user-guide/environment-variables.rst index bf87d7f5..42f44ad0 100644 --- a/docs/user-guide/environment-variables.rst +++ b/docs/user-guide/environment-variables.rst @@ -30,6 +30,11 @@ See the `Phalanx documentation for Times Square str: diff --git a/src/timessquare/handlers/internal.py b/src/timessquare/handlers/internal.py index c4d754c5..b9d6c940 100644 --- a/src/timessquare/handlers/internal.py +++ b/src/timessquare/handlers/internal.py @@ -19,12 +19,15 @@ """FastAPI router for all internal handlers.""" +@internal_router.get("/healthcheck") @internal_router.get( "/", description=( "Return metadata about the running application. Can also be used as" " a health check. This route is not exposed outside the cluster and" - " therefore cannot be used by external clients." + " therefore cannot be used by external clients. This route is also" + " exposed at /healthcheck so that Sentry tracing will ignore it:" + " https://docs.sentry.io/concepts/data-management/filtering/#transactions-coming-from-health-check" ), response_model_exclude_none=True, summary="Application metadata", @@ -34,6 +37,8 @@ async def get_index() -> Metadata: """GET ``/`` (the app's internal root). By convention, this endpoint returns only the application's metadata. + It is also exposed at ``/healthcheck`` so that Sentry tracing will ignore + it: https://docs.sentry.io/concepts/data-management/filtering/#transactions-coming-from-health-check """ return get_metadata( package_name="times-square", diff --git a/src/timessquare/main.py b/src/timessquare/main.py index 07a29745..c68d863a 100644 --- a/src/timessquare/main.py +++ b/src/timessquare/main.py @@ -15,6 +15,7 @@ from importlib.metadata import version from pathlib import Path +import sentry_sdk from fastapi import FastAPI from fastapi.openapi.utils import get_openapi from safir.database import create_database_engine, is_database_current @@ -24,6 +25,7 @@ from safir.fastapi import ClientRequestError, client_request_error_handler from safir.logging import configure_logging, configure_uvicorn_logging from safir.middleware.x_forwarded import XForwardedMiddleware +from safir.sentry import before_send_handler from safir.slack.webhook import SlackRouteErrorHandler from structlog import get_logger @@ -32,9 +34,17 @@ from .handlers.external import external_router from .handlers.internal import internal_router from .handlers.v1 import v1_router +from .sentry import make_traces_sampler __all__ = ["app", "config"] +sentry_sdk.init( + dsn=config.sentry_dsn, + environment=config.environment_name, + before_send=before_send_handler, + traces_sampler=make_traces_sampler(config.sentry_traces_sample_rate), +) + @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncIterator: diff --git a/src/timessquare/sentry.py b/src/timessquare/sentry.py new file mode 100644 index 00000000..d4ac0b6c --- /dev/null +++ b/src/timessquare/sentry.py @@ -0,0 +1,33 @@ +"""Sentry integration helpers.""" + +import re +from collections.abc import Callable +from typing import Any + +__all__ = ["make_traces_sampler"] + +EVENTS_REGEX = re.compile("/pages/.*/events$") + + +def make_traces_sampler( + original_rate: float, +) -> Callable[[dict[str, Any]], float]: + """Don't instrument events SSE endpoint to avoid leaking memory. + + Sample every other trace at the configured rate. + + When an SSE endpoint is instrumented, Sentry accumlates spans for every + sent event in memory until the initial connection is closed. Without Sentry + tracing instrumentation, SSE endpoints don't leak memory. + """ + + def traces_sampler(context: dict[str, Any]) -> float: + try: + path = context["asgi_scope"]["path"] + if EVENTS_REGEX.search(path): + return 0 + except IndexError: + pass + return original_rate + + return traces_sampler diff --git a/src/timessquare/worker/main.py b/src/timessquare/worker/main.py index 49a91a74..2aba0e3e 100644 --- a/src/timessquare/worker/main.py +++ b/src/timessquare/worker/main.py @@ -8,10 +8,12 @@ import arq import httpx +import sentry_sdk import structlog from safir.database import create_database_engine, is_database_current from safir.dependencies.db_session import db_session_dependency from safir.logging import configure_logging +from safir.sentry import before_send_handler from safir.slack.blockkit import SlackMessage, SlackTextField from safir.slack.webhook import SlackWebhookClient @@ -31,6 +33,13 @@ repo_removed, ) +sentry_sdk.init( + dsn=config.sentry_dsn, + environment=config.environment_name, + before_send=before_send_handler, + traces_sample_rate=config.sentry_traces_sample_rate, +) + async def startup(ctx: dict[Any, Any]) -> None: """Set up the worker context.""" diff --git a/tests/handlers/internal_test.py b/tests/handlers/internal_test.py index 672ecb97..a28b8ffa 100644 --- a/tests/handlers/internal_test.py +++ b/tests/handlers/internal_test.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING import pytest +from httpx import Response from timessquare.config import config @@ -12,10 +13,7 @@ from httpx import AsyncClient -@pytest.mark.asyncio -async def test_get_index(client: AsyncClient) -> None: - """Test ``GET /``.""" - response = await client.get("/") +def assert_response(response: Response) -> None: assert response.status_code == 200 data = response.json() assert data["name"] == config.name @@ -23,3 +21,13 @@ async def test_get_index(client: AsyncClient) -> None: assert isinstance(data["description"], str) assert isinstance(data["repository_url"], str) assert isinstance(data["documentation_url"], str) + + +@pytest.mark.asyncio +async def test_get_index(client: AsyncClient) -> None: + """Test ``GET /``.""" + response = await client.get("/") + assert_response(response) + + response = await client.get("/healthcheck") + assert_response(response) diff --git a/tox.ini b/tox.ini index faea6771..7562e52f 100644 --- a/tox.ini +++ b/tox.ini @@ -47,6 +47,7 @@ setenv = SAFIR_PROFILE = development TS_ALEMBIC_CONFIG_PATH = {toxinidir}/alembic.ini TS_ENVIRONMENT_URL = https://test.example.com + TS_ENVIRONMENT_NAME = testing TS_DATABASE_URL = postgresql://timessquare@localhost:5433/timessquare TS_DATABASE_PASSWORD = INSECURE-PASSWORD TS_REDIS_URL = redis://localhost:6379/0 @@ -86,6 +87,7 @@ commands = pre-commit run --all-files description = Run Alembic against a test database setenv = TS_ENVIRONMENT_URL = https://test.example.com + TS_ENVIRONMENT_NAME = testing TS_PATH_PREFIX = /times-square/api TS_ALEMBIC_CONFIG_PATH = {toxinidir}/alembic.ini TS_DATABASE_URL = postgresql://timessquare@127.0.0.1:5432/timessquare @@ -107,6 +109,7 @@ commands = times-square {posargs} setenv = TS_ENVIRONMENT_URL = https://test.example.com + TS_ENVIRONMENT_NAME = testing TS_PATH_PREFIX = /times-square/api TS_ALEMBIC_CONFIG_PATH = {toxinidir}/alembic.ini TS_DATABASE_URL = postgresql://timessquare@127.0.0.1:5432/timessquare @@ -126,6 +129,7 @@ whitelist_externals = setenv = SAFIR_PROFILE = development TS_ENVIRONMENT_URL = https://test.example.com + TS_ENVIRONMENT_NAME = testing TS_PATH_PREFIX = /times-square/api TS_ALEMBIC_CONFIG_PATH = {toxinidir}/alembic.ini TS_DATABASE_URL = postgresql://timessquare@127.0.0.1:5432/timessquare @@ -148,6 +152,7 @@ commands_pre = description = Build documentation (HTML) with Sphinx. setenv = TS_ENVIRONMENT_URL = https://test.example.com + TS_ENVIRONMENT_NAME = testing TS_DATABASE_URL = postgresql://timessquare@localhost:5433/timessquare TS_DATABASE_PASSWORD = INSECURE-PASSWORD TS_GAFAELFAWR_TOKEN = gt-eOfLolxU8FJ1xr08U7RTbg.Jr-KHSeISXwR5GXHiLemhw @@ -165,6 +170,7 @@ commands = description = Check links in documentation. setenv = TS_ENVIRONMENT_URL = https://test.example.com + TS_ENVIRONMENT_NAME = testing TS_DATABASE_URL = postgresql://timessquare@localhost:5433/timessquare TS_DATABASE_PASSWORD = INSECURE-PASSWORD TS_GAFAELFAWR_TOKEN = gt-eOfLolxU8FJ1xr08U7RTbg.Jr-KHSeISXwR5GXHiLemhw