From e46b5b30948bfa273daefb327f72b2b2e5c29d11 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Thu, 14 Mar 2024 13:19:14 -0400 Subject: [PATCH 01/12] Move noteburst interface to storage layer Since it's an interface to an external resource, it belongs with storage. --- src/timessquare/domain/githubcheckrun.py | 2 +- src/timessquare/domain/nbhtml.py | 2 +- src/timessquare/domain/page.py | 2 +- src/timessquare/services/githubrepo.py | 4 +-- src/timessquare/services/page.py | 26 +++++++++---------- .../{domain => storage}/noteburst.py | 4 +-- src/timessquare/storage/noteburstjobstore.py | 5 ++-- 7 files changed, 22 insertions(+), 23 deletions(-) rename src/timessquare/{domain => storage}/noteburst.py (97%) diff --git a/src/timessquare/domain/githubcheckrun.py b/src/timessquare/domain/githubcheckrun.py index 5552f2e2..ace4bcec 100644 --- a/src/timessquare/domain/githubcheckrun.py +++ b/src/timessquare/domain/githubcheckrun.py @@ -21,13 +21,13 @@ from timessquare.config import config +from ..storage.noteburst import NoteburstJobResponseModel, NoteburstJobStatus from .githubcheckout import ( GitHubRepositoryCheckout, NotebookSidecarFile, RecursiveGitTreeModel, RepositoryNotebookTreeRef, ) -from .noteburst import NoteburstJobResponseModel, NoteburstJobStatus from .page import PageExecutionInfo diff --git a/src/timessquare/domain/nbhtml.py b/src/timessquare/domain/nbhtml.py index 3d7bd388..4994590e 100644 --- a/src/timessquare/domain/nbhtml.py +++ b/src/timessquare/domain/nbhtml.py @@ -13,7 +13,7 @@ from pydantic import BaseModel, Field from traitlets.config import Config -from .noteburst import NoteburstJobResponseModel +from ..storage.noteburst import NoteburstJobResponseModel from .page import PageInstanceIdModel, PageInstanceModel diff --git a/src/timessquare/domain/page.py b/src/timessquare/domain/page.py index 126c5d09..4a0c0c72 100644 --- a/src/timessquare/domain/page.py +++ b/src/timessquare/domain/page.py @@ -28,7 +28,7 @@ ParameterSchemaError, ) -from .noteburst import NoteburstJobModel +from ..storage.noteburst import NoteburstJobModel NB_VERSION = 4 """The notebook format version used for reading and writing notebooks. diff --git a/src/timessquare/services/githubrepo.py b/src/timessquare/services/githubrepo.py index 31543a7a..95a29dc6 100644 --- a/src/timessquare/services/githubrepo.py +++ b/src/timessquare/services/githubrepo.py @@ -39,9 +39,9 @@ GitHubConfigsCheck, NotebookExecutionsCheck, ) -from timessquare.domain.noteburst import NoteburstJobStatus -from timessquare.domain.page import PageExecutionInfo, PageModel +from ..domain.page import PageExecutionInfo, PageModel +from ..storage.noteburst import NoteburstJobStatus from .page import PageService diff --git a/src/timessquare/services/page.py b/src/timessquare/services/page.py index 0319c216..d4254021 100644 --- a/src/timessquare/services/page.py +++ b/src/timessquare/services/page.py @@ -10,25 +10,25 @@ from httpx import AsyncClient from structlog.stdlib import BoundLogger -from timessquare.config import config -from timessquare.domain.githubtree import GitHubNode -from timessquare.domain.nbhtml import NbDisplaySettings, NbHtmlKey, NbHtmlModel -from timessquare.domain.noteburst import ( - NoteburstApi, - NoteburstJobResponseModel, - NoteburstJobStatus, -) -from timessquare.domain.page import ( +from ..config import config +from ..domain.githubtree import GitHubNode +from ..domain.nbhtml import NbDisplaySettings, NbHtmlKey, NbHtmlModel +from ..domain.page import ( PageExecutionInfo, PageInstanceModel, PageModel, PageSummaryModel, PersonModel, ) -from timessquare.exceptions import PageNotFoundError -from timessquare.storage.nbhtmlcache import NbHtmlCacheStore -from timessquare.storage.noteburstjobstore import NoteburstJobStore -from timessquare.storage.page import PageStore +from ..exceptions import PageNotFoundError +from ..storage.nbhtmlcache import NbHtmlCacheStore +from ..storage.noteburst import ( + NoteburstApi, + NoteburstJobResponseModel, + NoteburstJobStatus, +) +from ..storage.noteburstjobstore import NoteburstJobStore +from ..storage.page import PageStore class PageService: diff --git a/src/timessquare/domain/noteburst.py b/src/timessquare/storage/noteburst.py similarity index 97% rename from src/timessquare/domain/noteburst.py rename to src/timessquare/storage/noteburst.py index ef3e436e..6ad4d4da 100644 --- a/src/timessquare/domain/noteburst.py +++ b/src/timessquare/storage/noteburst.py @@ -1,4 +1,4 @@ -"""Domain model for the noteburst service integration.""" +"""Interface to the noteburst service.""" from __future__ import annotations @@ -10,7 +10,7 @@ from httpx import AsyncClient from pydantic import AnyHttpUrl, BaseModel, Field -from timessquare.config import config +from ..config import config class NoteburstJobModel(BaseModel): diff --git a/src/timessquare/storage/noteburstjobstore.py b/src/timessquare/storage/noteburstjobstore.py index 0c3b3389..8ea1f238 100644 --- a/src/timessquare/storage/noteburstjobstore.py +++ b/src/timessquare/storage/noteburstjobstore.py @@ -4,9 +4,8 @@ from redis.asyncio import Redis -from timessquare.domain.noteburst import NoteburstJobModel -from timessquare.domain.page import PageInstanceIdModel - +from ..domain.page import PageInstanceIdModel +from .noteburst import NoteburstJobModel from .redisbase import RedisPageInstanceStore From a9b96a85154b612c9205e0cfdfe1941ff13e3960 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Mon, 18 Mar 2024 12:31:31 -0400 Subject: [PATCH 02/12] Add arq queue client to PageService This will allow the page service to submit jobs to the arq queue. --- src/timessquare/dependencies/requestcontext.py | 8 ++++++++ src/timessquare/services/page.py | 3 +++ src/timessquare/worker/servicefactory.py | 17 +++++++++++++++++ tests/handlers/v1/github_test.py | 2 ++ 4 files changed, 30 insertions(+) diff --git a/src/timessquare/dependencies/requestcontext.py b/src/timessquare/dependencies/requestcontext.py index 3f5ae83e..b0b5b8f7 100644 --- a/src/timessquare/dependencies/requestcontext.py +++ b/src/timessquare/dependencies/requestcontext.py @@ -6,6 +6,8 @@ from fastapi import Depends, Request, Response from httpx import AsyncClient from redis.asyncio import Redis +from safir.arq import ArqQueue +from safir.dependencies.arq import arq_dependency from safir.dependencies.db_session import db_session_dependency from safir.dependencies.http_client import http_client_dependency from safir.dependencies.logger import logger_dependency @@ -52,6 +54,9 @@ class RequestContext: redis: Redis """Redis connection pool.""" + arq_queue: ArqQueue + """Client to the arq task queue.""" + http_client: AsyncClient """Shared HTTP client.""" @@ -64,6 +69,7 @@ def page_service(self) -> PageService: job_store=NoteburstJobStore(self.redis), http_client=self.http_client, logger=self.logger, + arq_queue=self.arq_queue, ) async def create_github_repo_service( @@ -125,6 +131,7 @@ async def context_dependency( session: Annotated[async_scoped_session, Depends(db_session_dependency)], redis: Annotated[Redis, Depends(redis_dependency)], http_client: Annotated[AsyncClient, Depends(http_client_dependency)], + arq_queue: Annotated[ArqQueue, Depends(arq_dependency)], ) -> RequestContext: """Provide a RequestContext as a dependency.""" return RequestContext( @@ -135,4 +142,5 @@ async def context_dependency( session=session, redis=redis, http_client=http_client, + arq_queue=arq_queue, ) diff --git a/src/timessquare/services/page.py b/src/timessquare/services/page.py index d4254021..110cfc2f 100644 --- a/src/timessquare/services/page.py +++ b/src/timessquare/services/page.py @@ -8,6 +8,7 @@ from typing import Any from httpx import AsyncClient +from safir.arq import ArqQueue from structlog.stdlib import BoundLogger from ..config import config @@ -49,12 +50,14 @@ def __init__( job_store: NoteburstJobStore, http_client: AsyncClient, logger: BoundLogger, + arq_queue: ArqQueue, ) -> None: self._page_store = page_store self._html_store = html_cache self._job_store = job_store self._http_client = http_client self._logger = logger + self._arq_queue = arq_queue self.noteburst_api = NoteburstApi(http_client=http_client) async def create_page_with_notebook_from_upload( diff --git a/src/timessquare/worker/servicefactory.py b/src/timessquare/worker/servicefactory.py index 83e1219f..01d266f1 100644 --- a/src/timessquare/worker/servicefactory.py +++ b/src/timessquare/worker/servicefactory.py @@ -3,6 +3,7 @@ from __future__ import annotations import httpx +from safir.arq import ArqMode, ArqQueue, MockArqQueue, RedisArqQueue from safir.github import GitHubAppClientFactory from safir.slack.blockkit import SlackException from sqlalchemy.ext.asyncio import async_scoped_session @@ -59,6 +60,7 @@ async def create_page_service( ) -> PageService: """Create a PageService for arq tasks.""" redis = await redis_dependency() + arq_queue = await create_arq_queue() return PageService( page_store=PageStore(db_session), @@ -66,4 +68,19 @@ async def create_page_service( job_store=NoteburstJobStore(redis), http_client=http_client, logger=logger, + arq_queue=arq_queue, ) + + +async def create_arq_queue() -> ArqQueue: + """Create an ArqQueue for arq tasks.""" + mode = config.arq_mode + if mode == ArqMode.production: + if not config.arq_redis_settings: + raise RuntimeError( + "The redis_settings argument must be set for arq in " + "production." + ) + return await RedisArqQueue.initialize(config.arq_redis_settings) + else: + return MockArqQueue() diff --git a/tests/handlers/v1/github_test.py b/tests/handlers/v1/github_test.py index 144d5764..ff857ae1 100644 --- a/tests/handlers/v1/github_test.py +++ b/tests/handlers/v1/github_test.py @@ -6,6 +6,7 @@ import pytest from httpx import AsyncClient from redis.asyncio import Redis +from safir.arq import MockArqQueue from safir.database import create_async_session, create_database_engine from structlog import get_logger @@ -38,6 +39,7 @@ async def test_github(client: AsyncClient) -> None: job_store=NoteburstJobStore(redis), http_client=client, logger=get_logger(), + arq_queue=MockArqQueue(), ) await page_service.add_page_to_store( From 93f610e001a642574027a5d5897ac117335b14b4 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Mon, 18 Mar 2024 12:45:03 -0400 Subject: [PATCH 03/12] Add DELETE /pages/:page/html This endpoint allows for soft-deletion of the cached HTML for a page instance. The service launches a noteburst execution of the notebook and launches a replace_nbhtml background task to monitor that job and ultimately replace the cached HTML for the page. This is different from regular HTML creation triggered by a GET request in that the existing HTML remains available to clients while the new page is being rendered. --- pyproject.toml | 1 + src/timessquare/handlers/v1/endpoints.py | 42 +++++++ src/timessquare/services/backgroundpage.py | 50 ++++++++ src/timessquare/services/page.py | 69 ++++++---- src/timessquare/worker/functions/__init__.py | 2 + .../worker/functions/replace_nbhtml.py | 118 ++++++++++++++++++ src/timessquare/worker/main.py | 2 + src/timessquare/worker/servicefactory.py | 6 +- 8 files changed, 265 insertions(+), 25 deletions(-) create mode 100644 src/timessquare/services/backgroundpage.py create mode 100644 src/timessquare/worker/functions/replace_nbhtml.py diff --git a/pyproject.toml b/pyproject.toml index 295adb28..d4890677 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,6 +147,7 @@ ignore = [ "TID252", # if we're going to use relative imports, use them always "TRY003", # good general advice but lint is way too aggressive "TRY301", # sometimes raising exceptions inside try is the best flow + "TD003", # don't need issue link for todos # The following settings should be disabled when using ruff format # per https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules diff --git a/src/timessquare/handlers/v1/endpoints.py b/src/timessquare/handlers/v1/endpoints.py index e78d5a19..5d0ed731 100644 --- a/src/timessquare/handlers/v1/endpoints.py +++ b/src/timessquare/handlers/v1/endpoints.py @@ -368,6 +368,48 @@ async def get_page_html( return HTMLResponse(html.html) +@v1_router.delete( + "/pages/{page}/html", + summary="Delete the cached HTML of a notebook.", + name="delete_page_html", + tags=[ApiTags.pages], + responses={ + 404: {"description": "Cached HTML not found", "model": ErrorModel}, + 422: {"description": "Invalid parameter", "model": ErrorModel}, + }, +) +async def delete_page_html( + page: Annotated[str, page_path_parameter], + context: Annotated[RequestContext, Depends(context_dependency)], +) -> PlainTextResponse: + """Delete the cached HTML of a notebook execution, causing it to be + recomputed in the background. + + By default, the HTML is soft-deleted so that it remains available to + existing clients until the new HTML replaces it in the cache. This endpoint + triggers a background task that recomputes the notebook and replaces the + cached HTML. + """ + page_service = context.page_service + async with context.session.begin(): + try: + await page_service.soft_delete_html( + name=page, query_params=context.request.query_params + ) + except PageNotFoundError as e: + e.location = ErrorLocation.path + e.field_path = ["page"] + raise + except ParameterSchemaValidationError as e: + e.location = ErrorLocation.query + e.field_path = [e.parameter] + raise + + # Ulimately create a resource that describes the background task; + # or subscribe the client to a SSE stream that reports the task's progress. + return PlainTextResponse(status_code=202) + + @v1_router.get( "/pages/{page}/htmlstatus", summary="Get the status of a page's HTML rendering", diff --git a/src/timessquare/services/backgroundpage.py b/src/timessquare/services/backgroundpage.py new file mode 100644 index 00000000..6cdb0394 --- /dev/null +++ b/src/timessquare/services/backgroundpage.py @@ -0,0 +1,50 @@ +"""A Page service specifically for use in Arq workers.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from timessquare.domain.page import PageInstanceModel +from timessquare.storage.noteburst import NoteburstJobResponseModel + +from .page import PageService + + +class BackgroundPageService(PageService): + """A Page service specifically for use in Arq workers. + + This is a subclass of PageService that is specifically designed to be + used in Arq workers. This service includes additional methods that are + only suitable for use in the background. + """ + + async def update_nbhtml( + self, + page_name: str, + parameter_values: Mapping[str, Any], + noteburst_response: NoteburstJobResponseModel, + ) -> None: + """Recompute a page instance with noteburst and update the HTML cache. + + This method is used with the ``recompute_page_instance`` task, which is + triggered by a soft delete of a page instance. In a soft-delete, the + page is instance is recomputed in the background while current users + see the stale version. + + Parameters + ---------- + page_name + The name of the page to recompute. + parameter_values + The parameter values to use when recomputing the page instance. + """ + page = await self.get_page(page_name) + resolved_values = page.resolve_and_validate_values(parameter_values) + page_instance = PageInstanceModel( + name=page.name, values=resolved_values, page=page + ) + # Create HTML for each display setting and store it in the cache + await self.render_nbhtml_matrix_from_noteburst_response( + page_instance=page_instance, noteburst_response=noteburst_response + ) diff --git a/src/timessquare/services/page.py b/src/timessquare/services/page.py index 110cfc2f..e59374cc 100644 --- a/src/timessquare/services/page.py +++ b/src/timessquare/services/page.py @@ -323,7 +323,8 @@ async def _get_html_from_noteburst_job( display_settings: NbDisplaySettings, ) -> NbHtmlModel | None: """Convert a noteburst job for a given page and parameter values into - HTML (caching that HTML as well), and triggering a new noteburst. + HTML (caching that HTML as well), and triggering a new noteburst job if + the job was not found. Parameters ---------- @@ -361,15 +362,11 @@ async def _get_html_from_noteburst_job( status=str(noteburst_response.status), ) if noteburst_response.status == NoteburstJobStatus.complete: - ipynb = noteburst_response.ipynb - if ipynb is None: - raise RuntimeError( - "Noteburst job is complete but has no ipynb" + html_renders = ( + await self.render_nbhtml_matrix_from_noteburst_response( + page_instance=page_instance, + noteburst_response=noteburst_response, ) - html_renders = await self._create_html_matrix( - page_instance=page_instance, - ipynb=ipynb, - noteburst_response=noteburst_response, ) # return the specific HTML render that the client asked for return html_renders[display_settings] @@ -395,6 +392,24 @@ async def _get_html_from_noteburst_job( ) return None + async def soft_delete_html( + self, *, name: str, query_params: Mapping[str, Any] + ) -> None: + """Soft delete the HTML for a page given the query parameters.""" + page = await self.get_page(name) + resolved_values = page.resolve_and_validate_values(query_params) + page_instance = PageInstanceModel( + name=page.name, values=resolved_values, page=page + ) + exec_info = await self.request_noteburst_execution(page_instance) + await self._arq_queue.enqueue( # provides an arq job metadata + "replace_nbhtml", + page_name=page.name, + parameter_values=resolved_values, + noteburst_job=exec_info.noteburst_job, + ) + # Format the job for a response + async def request_noteburst_execution( self, page_instance: PageInstanceModel, *, enable_retry: bool = True ) -> PageExecutionInfo: @@ -441,24 +456,25 @@ async def request_noteburst_execution( noteburst_error_message=r.error, ) - async def _create_html_matrix( + async def render_nbhtml_matrix_from_noteburst_response( self, *, page_instance: PageInstanceModel, - ipynb: str, noteburst_response: NoteburstJobResponseModel, - ) -> dict[Any, NbHtmlModel]: - # These keys correspond to display arguments in - # NbHtml.create_from_notebook_result - matrix_keys = [ - NbDisplaySettings(hide_code=True), - NbDisplaySettings(hide_code=False), - ] + ) -> dict[NbDisplaySettings, NbHtmlModel]: + """Render the HTML matrix from a noteburst response. + + The Noteburst Job in the NoteburstJobStore is deleted after rendering. + If the noteburst job did not appear in the store (because the HTML + was being re-rendered in the background), this method still succeeds. + """ html_matrix: dict[NbDisplaySettings, NbHtmlModel] = {} - for matrix_key in matrix_keys: + if noteburst_response.ipynb is None: + raise RuntimeError("Noteburst job is complete but has no ipynb") + for matrix_key in self.html_display_settings_matrix: nbhtml = NbHtmlModel.create_from_noteburst_result( page_instance=page_instance, - ipynb=ipynb, + ipynb=noteburst_response.ipynb, noteburst_result=noteburst_response, display_settings=matrix_key, ) @@ -468,11 +484,20 @@ async def _create_html_matrix( "Stored new HTML", display_settings=asdict(matrix_key) ) - await self._job_store.delete_instance(page_instance) - self._logger.debug("Deleted old job record") + deleted_job = await self._job_store.delete_instance(page_instance) + if deleted_job: + self._logger.debug("Deleted old job record") return html_matrix + @property + def html_display_settings_matrix(self) -> list[NbDisplaySettings]: + """The matrix of all display settings for HTML rendering.""" + return [ + NbDisplaySettings(hide_code=True), + NbDisplaySettings(hide_code=False), + ] + @property def _noteburst_auth_header(self) -> dict[str, str]: return { diff --git a/src/timessquare/worker/functions/__init__.py b/src/timessquare/worker/functions/__init__.py index d5f23f31..ca632652 100644 --- a/src/timessquare/worker/functions/__init__.py +++ b/src/timessquare/worker/functions/__init__.py @@ -3,6 +3,7 @@ from .create_rerequested_check_run import create_rerequested_check_run from .ping import ping from .pull_request_sync import pull_request_sync +from .replace_nbhtml import replace_nbhtml from .repo_added import repo_added from .repo_push import repo_push from .repo_removed import repo_removed @@ -16,4 +17,5 @@ "compute_check_run", "create_check_run", "create_rerequested_check_run", + "replace_nbhtml", ] diff --git a/src/timessquare/worker/functions/replace_nbhtml.py b/src/timessquare/worker/functions/replace_nbhtml.py new file mode 100644 index 00000000..e66547a7 --- /dev/null +++ b/src/timessquare/worker/functions/replace_nbhtml.py @@ -0,0 +1,118 @@ +"""A worker task that replaces a page isntance's HTML in the cache +if a Noteburst job is complete. +""" + +from __future__ import annotations + +import json +from collections.abc import Mapping +from typing import Any + +from safir.dependencies.db_session import db_session_dependency +from safir.slack.blockkit import SlackCodeBlock, SlackMessage, SlackTextField + +from timessquare.storage.noteburst import ( + NoteburstApi, + NoteburstJobModel, + NoteburstJobStatus, +) +from timessquare.worker.servicefactory import ( + create_arq_queue, + create_page_service, +) + + +async def replace_nbhtml( + ctx: dict[Any, Any], + *, + page_name: str, + parameter_values: Mapping[str, Any], + noteburst_job: NoteburstJobModel, +) -> str: + """Recompute a page instance with noteburst and update the HTML cache. + + This function is triggered with a page instance (HTML rendering) is soft + deleted so that it's recomputed in the background while current users + see the stale version. + """ + logger = ctx["logger"].bind( + task="replace_nbhtml", + page=page_name, + parameter_values=parameter_values, + ) + logger.info("Running replace_nbhtml") + + try: + noteburst_client = NoteburstApi( + http_client=ctx["http_client"], + ) + updated_job_result = await noteburst_client.get_job( + str(noteburst_job.job_url) + ) + if not updated_job_result.data: + raise RuntimeError( + f"Failed to get noteburst job at {noteburst_job.job_url}" + ) + + async for db_session in db_session_dependency(): + page_service = await create_page_service( + http_client=ctx["http_client"], + logger=logger, + db_session=db_session, + ) + + if updated_job_result.data.status == NoteburstJobStatus.complete: + # Job finished, so render the HTML and update the cache + await page_service.update_nbhtml( + page_name=page_name, + parameter_values=parameter_values, + noteburst_response=updated_job_result.data, + ) + elif ( + updated_job_result.data.status == NoteburstJobStatus.not_found + ): + # Job was lost, so re-send the request + await page_service.soft_delete_html( + name=page_name, query_params=parameter_values + ) + else: + # Job is still queued or running, so scheduled another task + # TODO(jonathansick): add a start time and a timeout to the + # job's parameters so we can abort if it takes too long. + arq_queue = await create_arq_queue() + await arq_queue.enqueue( + "replace_nbhtml", + page_name=page_name, + parameter_values=parameter_values, + noteburst_job=updated_job_result.data.to_job_model(), + _defer_by=1, # look again in 1 second + ) + + except Exception as e: + if "slack" in ctx: + await ctx["slack"].post( + SlackMessage( + message="Times Square worker exception.", + fields=[ + SlackTextField( + heading="Task", text="recompute_page_instance" + ), + SlackTextField(heading="Page", text=page_name), + ], + blocks=[ + SlackCodeBlock( + heading="Parameters", + code=json.dumps( + parameter_values, indent=2, sort_keys=True + ), + ), + SlackCodeBlock( + heading="Exception", + code=str(e), + ), + ], + ) + ) + raise + + return "Done" diff --git a/src/timessquare/worker/main.py b/src/timessquare/worker/main.py index 0f6ea7fe..ed69103b 100644 --- a/src/timessquare/worker/main.py +++ b/src/timessquare/worker/main.py @@ -23,6 +23,7 @@ create_rerequested_check_run, ping, pull_request_sync, + replace_nbhtml, repo_added, repo_push, repo_removed, @@ -126,6 +127,7 @@ class WorkerSettings: compute_check_run, create_check_run, create_rerequested_check_run, + replace_nbhtml, ] redis_settings = config.arq_redis_settings diff --git a/src/timessquare/worker/servicefactory.py b/src/timessquare/worker/servicefactory.py index 01d266f1..c1d3a3a5 100644 --- a/src/timessquare/worker/servicefactory.py +++ b/src/timessquare/worker/servicefactory.py @@ -11,8 +11,8 @@ from timessquare.config import config from timessquare.dependencies.redis import redis_dependency +from timessquare.services.backgroundpage import BackgroundPageService from timessquare.services.githubrepo import GitHubRepoService -from timessquare.services.page import PageService from timessquare.storage.nbhtmlcache import NbHtmlCacheStore from timessquare.storage.noteburstjobstore import NoteburstJobStore from timessquare.storage.page import PageStore @@ -57,12 +57,12 @@ async def create_page_service( http_client: httpx.AsyncClient, logger: BoundLogger, db_session: async_scoped_session, -) -> PageService: +) -> BackgroundPageService: """Create a PageService for arq tasks.""" redis = await redis_dependency() arq_queue = await create_arq_queue() - return PageService( + return BackgroundPageService( page_store=PageStore(db_session), html_cache=NbHtmlCacheStore(redis), job_store=NoteburstJobStore(redis), From d1b019e58deb78025e5ed15e275da55b2cd81868 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Mon, 18 Mar 2024 14:51:16 -0400 Subject: [PATCH 04/12] Finish migrating to NoteburstApi class All code was supposed to have migrated to using the common NoteburstApi class for communicating with noteburst, but one method was still making a direct call. Restructured the request_noteburst_execution method to take advantage of the result being pre-parsed. --- src/timessquare/services/page.py | 64 ++++++++++++++------------------ 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/src/timessquare/services/page.py b/src/timessquare/services/page.py index e59374cc..720500ad 100644 --- a/src/timessquare/services/page.py +++ b/src/timessquare/services/page.py @@ -11,7 +11,6 @@ from safir.arq import ArqQueue from structlog.stdlib import BoundLogger -from ..config import config from ..domain.githubtree import GitHubNode from ..domain.nbhtml import NbDisplaySettings, NbHtmlKey, NbHtmlModel from ..domain.page import ( @@ -350,47 +349,46 @@ async def _get_html_from_noteburst_job( await self.request_noteburst_execution(page_instance) return None - r = await self._http_client.get( - str(job.job_url), headers=self._noteburst_auth_header - ) - if r.status_code == 200: - noteburst_response = NoteburstJobResponseModel.model_validate( - r.json() - ) - self._logger.debug( - "Got noteburst job metadata", - status=str(noteburst_response.status), - ) - if noteburst_response.status == NoteburstJobStatus.complete: - html_renders = ( - await self.render_nbhtml_matrix_from_noteburst_response( - page_instance=page_instance, - noteburst_response=noteburst_response, - ) - ) - # return the specific HTML render that the client asked for - return html_renders[display_settings] - - else: - # Noteburst job isn't complete - return None + r = await self.noteburst_api.get_job(str(job.job_url)) - elif r.status_code != 404: + if r.status_code == 404: # Noteburst lost the job; delete our record and try again self._logger.warning( "Got a 404 from a noteburst job", job_url=job.job_url ) await self._job_store.delete_instance(page_instance) await self.request_noteburst_execution(page_instance) - else: + return None + + elif r.status_code >= 500: # server error from noteburst self._logger.warning( "Got unknown response from noteburst job", job_url=job.job_url, noteburst_status=r.status_code, - noteburst_body=r.text, ) - return None + return None + elif r.data is None: + self._logger.warning( + "Got empty response from noteburst job", + job_url=job.job_url, + noteburst_status=r.status_code, + ) + return None + + if r.data.status == NoteburstJobStatus.complete: + html_renders = ( + await self.render_nbhtml_matrix_from_noteburst_response( + page_instance=page_instance, + noteburst_response=r.data, + ) + ) + # return the specific HTML render that the client asked for + return html_renders[display_settings] + + else: + # Noteburst job isn't complete + return None async def soft_delete_html( self, *, name: str, query_params: Mapping[str, Any] @@ -497,11 +495,3 @@ def html_display_settings_matrix(self) -> list[NbDisplaySettings]: NbDisplaySettings(hide_code=True), NbDisplaySettings(hide_code=False), ] - - @property - def _noteburst_auth_header(self) -> dict[str, str]: - return { - "Authorization": ( - f"Bearer {config.gafaelfawr_token.get_secret_value()}" - ) - } From 6479dd1edef260468bc8650647866c17eec75804 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Mon, 18 Mar 2024 15:18:50 -0400 Subject: [PATCH 05/12] Declare the content type for HTML Previously the API spec was showing this as the default application/json. --- src/timessquare/handlers/v1/endpoints.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/timessquare/handlers/v1/endpoints.py b/src/timessquare/handlers/v1/endpoints.py index 5d0ed731..3a961aac 100644 --- a/src/timessquare/handlers/v1/endpoints.py +++ b/src/timessquare/handlers/v1/endpoints.py @@ -336,6 +336,10 @@ async def get_rendered_notebook( name="get_page_html", tags=[ApiTags.pages], responses={ + 200: { + "description": "HTML of the notebook", + "content": {"text/html": {}}, + }, 404: {"description": "Page not found", "model": ErrorModel}, 422: {"description": "Invalid parameter", "model": ErrorModel}, }, From 454927e18b3d0e719ea7595fffe84ba6a9c0a99b Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Mon, 18 Mar 2024 17:18:04 -0400 Subject: [PATCH 06/12] Add starlette-sse dependency This will let us add a server-sent events endpoint --- requirements/dev.txt | 124 +++++++++++++++++++++--------------------- requirements/main.in | 1 + requirements/main.txt | 58 +++++++++++--------- 3 files changed, 96 insertions(+), 87 deletions(-) diff --git a/requirements/dev.txt b/requirements/dev.txt index b0798c57..123e86c6 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -208,59 +208,59 @@ click==8.1.7 \ # -c requirements/main.txt # documenteer # uvicorn -coverage[toml]==7.4.3 \ - --hash=sha256:0209a6369ccce576b43bb227dc8322d8ef9e323d089c6f3f26a597b09cb4d2aa \ - --hash=sha256:062b0a75d9261e2f9c6d071753f7eef0fc9caf3a2c82d36d76667ba7b6470003 \ - --hash=sha256:0842571634f39016a6c03e9d4aba502be652a6e4455fadb73cd3a3a49173e38f \ - --hash=sha256:16bae383a9cc5abab9bb05c10a3e5a52e0a788325dc9ba8499e821885928968c \ - --hash=sha256:18c7320695c949de11a351742ee001849912fd57e62a706d83dfc1581897fa2e \ - --hash=sha256:18d90523ce7553dd0b7e23cbb28865db23cddfd683a38fb224115f7826de78d0 \ - --hash=sha256:1bf25fbca0c8d121a3e92a2a0555c7e5bc981aee5c3fdaf4bb7809f410f696b9 \ - --hash=sha256:276f6077a5c61447a48d133ed13e759c09e62aff0dc84274a68dc18660104d52 \ - --hash=sha256:280459f0a03cecbe8800786cdc23067a8fc64c0bd51dc614008d9c36e1659d7e \ - --hash=sha256:28ca2098939eabab044ad68850aac8f8db6bf0b29bc7f2887d05889b17346454 \ - --hash=sha256:2c854ce44e1ee31bda4e318af1dbcfc929026d12c5ed030095ad98197eeeaed0 \ - --hash=sha256:35eb581efdacf7b7422af677b92170da4ef34500467381e805944a3201df2079 \ - --hash=sha256:37389611ba54fd6d278fde86eb2c013c8e50232e38f5c68235d09d0a3f8aa352 \ - --hash=sha256:3b253094dbe1b431d3a4ac2f053b6d7ede2664ac559705a704f621742e034f1f \ - --hash=sha256:3b2eccb883368f9e972e216c7b4c7c06cabda925b5f06dde0650281cb7666a30 \ - --hash=sha256:451f433ad901b3bb00184d83fd83d135fb682d780b38af7944c9faeecb1e0bfe \ - --hash=sha256:489763b2d037b164846ebac0cbd368b8a4ca56385c4090807ff9fad817de4113 \ - --hash=sha256:4af154d617c875b52651dd8dd17a31270c495082f3d55f6128e7629658d63765 \ - --hash=sha256:506edb1dd49e13a2d4cac6a5173317b82a23c9d6e8df63efb4f0380de0fbccbc \ - --hash=sha256:6679060424faa9c11808598504c3ab472de4531c571ab2befa32f4971835788e \ - --hash=sha256:69b9f6f66c0af29642e73a520b6fed25ff9fd69a25975ebe6acb297234eda501 \ - --hash=sha256:6c00cdc8fa4e50e1cc1f941a7f2e3e0f26cb2a1233c9696f26963ff58445bac7 \ - --hash=sha256:6c0cdedd3500e0511eac1517bf560149764b7d8e65cb800d8bf1c63ebf39edd2 \ - --hash=sha256:708a3369dcf055c00ddeeaa2b20f0dd1ce664eeabde6623e516c5228b753654f \ - --hash=sha256:718187eeb9849fc6cc23e0d9b092bc2348821c5e1a901c9f8975df0bc785bfd4 \ - --hash=sha256:767b35c3a246bcb55b8044fd3a43b8cd553dd1f9f2c1eeb87a302b1f8daa0524 \ - --hash=sha256:77fbfc5720cceac9c200054b9fab50cb2a7d79660609200ab83f5db96162d20c \ - --hash=sha256:7cbde573904625509a3f37b6fecea974e363460b556a627c60dc2f47e2fffa51 \ - --hash=sha256:8249b1c7334be8f8c3abcaaa996e1e4927b0e5a23b65f5bf6cfe3180d8ca7840 \ - --hash=sha256:8580b827d4746d47294c0e0b92854c85a92c2227927433998f0d3320ae8a71b6 \ - --hash=sha256:8640f1fde5e1b8e3439fe482cdc2b0bb6c329f4bb161927c28d2e8879c6029ee \ - --hash=sha256:9a9babb9466fe1da12417a4aed923e90124a534736de6201794a3aea9d98484e \ - --hash=sha256:a78ed23b08e8ab524551f52953a8a05d61c3a760781762aac49f8de6eede8c45 \ - --hash=sha256:abbbd8093c5229c72d4c2926afaee0e6e3140de69d5dcd918b2921f2f0c8baba \ - --hash=sha256:ae7f19afe0cce50039e2c782bff379c7e347cba335429678450b8fe81c4ef96d \ - --hash=sha256:b3ec74cfef2d985e145baae90d9b1b32f85e1741b04cd967aaf9cfa84c1334f3 \ - --hash=sha256:b51bfc348925e92a9bd9b2e48dad13431b57011fd1038f08316e6bf1df107d10 \ - --hash=sha256:b9a4a8dd3dcf4cbd3165737358e4d7dfbd9d59902ad11e3b15eebb6393b0446e \ - --hash=sha256:ba3a8aaed13770e970b3df46980cb068d1c24af1a1968b7818b69af8c4347efb \ - --hash=sha256:c0524de3ff096e15fcbfe8f056fdb4ea0bf497d584454f344d59fce069d3e6e9 \ - --hash=sha256:c0a120238dd71c68484f02562f6d446d736adcc6ca0993712289b102705a9a3a \ - --hash=sha256:cbbe5e739d45a52f3200a771c6d2c7acf89eb2524890a4a3aa1a7fa0695d2a47 \ - --hash=sha256:ce8c50520f57ec57aa21a63ea4f325c7b657386b3f02ccaedeccf9ebe27686e1 \ - --hash=sha256:cf30900aa1ba595312ae41978b95e256e419d8a823af79ce670835409fc02ad3 \ - --hash=sha256:d25b937a5d9ffa857d41be042b4238dd61db888533b53bc76dc082cb5a15e914 \ - --hash=sha256:d6cdecaedea1ea9e033d8adf6a0ab11107b49571bbb9737175444cea6eb72328 \ - --hash=sha256:dec9de46a33cf2dd87a5254af095a409ea3bf952d85ad339751e7de6d962cde6 \ - --hash=sha256:ebe7c9e67a2d15fa97b77ea6571ce5e1e1f6b0db71d1d5e96f8d2bf134303c1d \ - --hash=sha256:ee866acc0861caebb4f2ab79f0b94dbfbdbfadc19f82e6e9c93930f74e11d7a0 \ - --hash=sha256:f6a09b360d67e589236a44f0c39218a8efba2593b6abdccc300a8862cffc2f94 \ - --hash=sha256:fcc66e222cf4c719fe7722a403888b1f5e1682d1679bd780e2b26c18bb648cdc \ - --hash=sha256:fd6545d97c98a192c5ac995d21c894b581f1fd14cf389be90724d21808b657e2 +coverage[toml]==7.4.4 \ + --hash=sha256:00838a35b882694afda09f85e469c96367daa3f3f2b097d846a7216993d37f4c \ + --hash=sha256:0513b9508b93da4e1716744ef6ebc507aff016ba115ffe8ecff744d1322a7b63 \ + --hash=sha256:09c3255458533cb76ef55da8cc49ffab9e33f083739c8bd4f58e79fecfe288f7 \ + --hash=sha256:09ef9199ed6653989ebbcaacc9b62b514bb63ea2f90256e71fea3ed74bd8ff6f \ + --hash=sha256:09fa497a8ab37784fbb20ab699c246053ac294d13fc7eb40ec007a5043ec91f8 \ + --hash=sha256:0f9f50e7ef2a71e2fae92774c99170eb8304e3fdf9c8c3c7ae9bab3e7229c5cf \ + --hash=sha256:137eb07173141545e07403cca94ab625cc1cc6bc4c1e97b6e3846270e7e1fea0 \ + --hash=sha256:1f384c3cc76aeedce208643697fb3e8437604b512255de6d18dae3f27655a384 \ + --hash=sha256:201bef2eea65e0e9c56343115ba3814e896afe6d36ffd37bab783261db430f76 \ + --hash=sha256:38dd60d7bf242c4ed5b38e094baf6401faa114fc09e9e6632374388a404f98e7 \ + --hash=sha256:3b799445b9f7ee8bf299cfaed6f5b226c0037b74886a4e11515e569b36fe310d \ + --hash=sha256:3ea79bb50e805cd6ac058dfa3b5c8f6c040cb87fe83de10845857f5535d1db70 \ + --hash=sha256:40209e141059b9370a2657c9b15607815359ab3ef9918f0196b6fccce8d3230f \ + --hash=sha256:41c9c5f3de16b903b610d09650e5e27adbfa7f500302718c9ffd1c12cf9d6818 \ + --hash=sha256:54eb8d1bf7cacfbf2a3186019bcf01d11c666bd495ed18717162f7eb1e9dd00b \ + --hash=sha256:598825b51b81c808cb6f078dcb972f96af96b078faa47af7dfcdf282835baa8d \ + --hash=sha256:5fc1de20b2d4a061b3df27ab9b7c7111e9a710f10dc2b84d33a4ab25065994ec \ + --hash=sha256:623512f8ba53c422fcfb2ce68362c97945095b864cda94a92edbaf5994201083 \ + --hash=sha256:690db6517f09336559dc0b5f55342df62370a48f5469fabf502db2c6d1cffcd2 \ + --hash=sha256:69eb372f7e2ece89f14751fbcbe470295d73ed41ecd37ca36ed2eb47512a6ab9 \ + --hash=sha256:73bfb9c09951125d06ee473bed216e2c3742f530fc5acc1383883125de76d9cd \ + --hash=sha256:742a76a12aa45b44d236815d282b03cfb1de3b4323f3e4ec933acfae08e54ade \ + --hash=sha256:7c95949560050d04d46b919301826525597f07b33beba6187d04fa64d47ac82e \ + --hash=sha256:8130a2aa2acb8788e0b56938786c33c7c98562697bf9f4c7d6e8e5e3a0501e4a \ + --hash=sha256:8a2b2b78c78293782fd3767d53e6474582f62443d0504b1554370bde86cc8227 \ + --hash=sha256:8ce1415194b4a6bd0cdcc3a1dfbf58b63f910dcb7330fe15bdff542c56949f87 \ + --hash=sha256:9ca28a302acb19b6af89e90f33ee3e1906961f94b54ea37de6737b7ca9d8827c \ + --hash=sha256:a4cdc86d54b5da0df6d3d3a2f0b710949286094c3a6700c21e9015932b81447e \ + --hash=sha256:aa5b1c1bfc28384f1f53b69a023d789f72b2e0ab1b3787aae16992a7ca21056c \ + --hash=sha256:aadacf9a2f407a4688d700e4ebab33a7e2e408f2ca04dbf4aef17585389eff3e \ + --hash=sha256:ae71e7ddb7a413dd60052e90528f2f65270aad4b509563af6d03d53e979feafd \ + --hash=sha256:b14706df8b2de49869ae03a5ccbc211f4041750cd4a66f698df89d44f4bd30ec \ + --hash=sha256:b1a93009cb80730c9bca5d6d4665494b725b6e8e157c1cb7f2db5b4b122ea562 \ + --hash=sha256:b2991665420a803495e0b90a79233c1433d6ed77ef282e8e152a324bbbc5e0c8 \ + --hash=sha256:b2c5edc4ac10a7ef6605a966c58929ec6c1bd0917fb8c15cb3363f65aa40e677 \ + --hash=sha256:b4d33f418f46362995f1e9d4f3a35a1b6322cb959c31d88ae56b0298e1c22357 \ + --hash=sha256:b91cbc4b195444e7e258ba27ac33769c41b94967919f10037e6355e998af255c \ + --hash=sha256:c74880fc64d4958159fbd537a091d2a585448a8f8508bf248d72112723974cbd \ + --hash=sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49 \ + --hash=sha256:cac99918c7bba15302a2d81f0312c08054a3359eaa1929c7e4b26ebe41e9b286 \ + --hash=sha256:cc4f1358cb0c78edef3ed237ef2c86056206bb8d9140e73b6b89fbcfcbdd40e1 \ + --hash=sha256:ccd341521be3d1b3daeb41960ae94a5e87abe2f46f17224ba5d6f2b8398016cf \ + --hash=sha256:ce4b94265ca988c3f8e479e741693d143026632672e3ff924f25fab50518dd51 \ + --hash=sha256:cf271892d13e43bc2b51e6908ec9a6a5094a4df1d8af0bfc360088ee6c684409 \ + --hash=sha256:d5ae728ff3b5401cc320d792866987e7e7e880e6ebd24433b70a33b643bb0384 \ + --hash=sha256:d71eec7d83298f1af3326ce0ff1d0ea83c7cb98f72b577097f9083b20bdaf05e \ + --hash=sha256:d898fe162d26929b5960e4e138651f7427048e72c853607f2b200909794ed978 \ + --hash=sha256:d89d7b2974cae412400e88f35d86af72208e1ede1a541954af5d944a8ba46c57 \ + --hash=sha256:dfa8fe35a0bb90382837b238fff375de15f0dcdb9ae68ff85f7a63649c98527e \ + --hash=sha256:e0be5efd5127542ef31f165de269f77560d6cdef525fffa446de6f7e9186cfb2 \ + --hash=sha256:fdfafb32984684eb03c2d83e1e51f64f0906b11e64482df3c5db936ce3839d48 \ + --hash=sha256:ff7687ca3d7028d8a5f0ebae95a6e4827c5616b31a4ee1192bdfde697db110d4 # via # -r requirements/dev.in # pytest-cov @@ -573,9 +573,9 @@ pycparser==2.21 \ # via # -c requirements/main.txt # cffi -pydantic==2.6.3 \ - --hash=sha256:72c6034df47f46ccdf81869fddb81aade68056003900a8724a4f160700016a2a \ - --hash=sha256:e07805c4c7f5c6826e33a1d4c9d47950d7eaf34868e2690f8594d2e30241f11f +pydantic==2.6.4 \ + --hash=sha256:b1704e0847db01817624a6b86766967f552dd9dbf3afba4004409f908dcc84e6 \ + --hash=sha256:cc46fce86607580867bdc3361ad462bab9c222ef042d3da86f2fb333e1d916c5 # via # -c requirements/main.txt # documenteer @@ -756,9 +756,9 @@ pyyaml==6.0.1 \ # pre-commit # pybtex # sphinxcontrib-redoc -referencing==0.33.0 \ - --hash=sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5 \ - --hash=sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7 +referencing==0.34.0 \ + --hash=sha256:5773bd84ef41799a5a8ca72dc34590c041eb01bf9aa02632b4a973fb0181a844 \ + --hash=sha256:d53ae300ceddd3169f1ffa9caf2cb7b769e92657e4fafb23d34b93679116dfd4 # via # -c requirements/main.txt # jsonschema @@ -1097,9 +1097,9 @@ virtualenv==20.25.1 \ # via pre-commit # The following packages are considered to be unsafe in a requirements file: -setuptools==69.1.1 \ - --hash=sha256:02fa291a0471b3a18b2b2481ed902af520c69e8ae0919c13da936542754b4c56 \ - --hash=sha256:5c0806c7d9af348e6dd3777b4f4dbb42c7ad85b190104837488eab9a7c945cf8 +setuptools==69.2.0 \ + --hash=sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e \ + --hash=sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c # via # documenteer # nodeenv diff --git a/requirements/main.in b/requirements/main.in index b9df08b1..2d1bac8d 100644 --- a/requirements/main.in +++ b/requirements/main.in @@ -26,3 +26,4 @@ markdown-it-py[linkify,plugins] mdformat mdformat-gfm PyYAML +sse-starlette diff --git a/requirements/main.txt b/requirements/main.txt index 4cdd8e8c..6005e591 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -13,6 +13,7 @@ anyio==4.3.0 \ --hash=sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6 # via # httpx + # sse-starlette # starlette # watchfiles arq==0.25.0 \ @@ -458,13 +459,13 @@ jsonschema-specifications==2023.12.1 \ --hash=sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc \ --hash=sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c # via jsonschema -jupyter-client==8.6.0 \ - --hash=sha256:0642244bb83b4764ae60d07e010e15f0e2d275ec4e918a8f7b80fbbef3ca60c7 \ - --hash=sha256:909c474dbe62582ae62b758bca86d6518c85234bdee2d908c778db6d72f39d99 +jupyter-client==8.6.1 \ + --hash=sha256:3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f \ + --hash=sha256:e842515e2bab8e19186d89fdfea7abd15e39dd581f94e399f00e2af5a1652d3f # via nbclient -jupyter-core==5.7.1 \ - --hash=sha256:c65c82126453a723a2804aa52409930434598fd9d35091d63dfb919d2b765bb7 \ - --hash=sha256:de61a9d7fc71240f688b2fb5ab659fbb56979458dc66a71decd098e03c79e218 +jupyter-core==5.7.2 \ + --hash=sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409 \ + --hash=sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9 # via # jupyter-client # nbclient @@ -579,17 +580,17 @@ mistune==3.0.2 \ --hash=sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205 \ --hash=sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8 # via nbconvert -nbclient==0.9.0 \ - --hash=sha256:4b28c207877cf33ef3a9838cdc7a54c5ceff981194a82eac59d558f05487295e \ - --hash=sha256:a3a1ddfb34d4a9d17fc744d655962714a866639acd30130e9be84191cd97cd15 +nbclient==0.10.0 \ + --hash=sha256:4b3f1b7dba531e498449c4db4f53da339c91d449dc11e9af3a43b4eb5c5abb09 \ + --hash=sha256:f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f # via nbconvert nbconvert==7.16.2 \ --hash=sha256:0c01c23981a8de0220255706822c40b751438e32467d6a686e26be08ba784382 \ --hash=sha256:8310edd41e1c43947e4ecf16614c61469ebc024898eb808cce0999860fc9fb16 # via -r requirements/main.in -nbformat==5.9.2 \ - --hash=sha256:1c5172d786a41b82bcfd0c23f9e6b6f072e8fb49c39250219e4acfff1efe89e9 \ - --hash=sha256:5f98b5ba1997dff175e77e0c17d5c10a96eaed2cbd1de3533d1fc35d5e111192 +nbformat==5.10.3 \ + --hash=sha256:60ed5e910ef7c6264b87d644f276b1b49e24011930deef54605188ddeb211685 \ + --hash=sha256:d9476ca28676799af85385f409b49d95e199951477a159a576ef2a675151e5e8 # via # -r requirements/main.in # nbclient @@ -612,9 +613,9 @@ pycparser==2.21 \ --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 # via cffi -pydantic[email]==2.6.3 \ - --hash=sha256:72c6034df47f46ccdf81869fddb81aade68056003900a8724a4f160700016a2a \ - --hash=sha256:e07805c4c7f5c6826e33a1d4c9d47950d7eaf34868e2690f8594d2e30241f11f +pydantic[email]==2.6.4 \ + --hash=sha256:b1704e0847db01817624a6b86766967f552dd9dbf3afba4004409f908dcc84e6 \ + --hash=sha256:cc46fce86607580867bdc3361ad462bab9c222ef042d3da86f2fb333e1d916c5 # via # -r requirements/main.in # fastapi @@ -881,9 +882,9 @@ redis[hiredis]==5.0.3 \ # via # arq # safir -referencing==0.33.0 \ - --hash=sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5 \ - --hash=sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7 +referencing==0.34.0 \ + --hash=sha256:5773bd84ef41799a5a8ca72dc34590c041eb01bf9aa02632b4a973fb0181a844 \ + --hash=sha256:d53ae300ceddd3169f1ffa9caf2cb7b769e92657e4fafb23d34b93679116dfd4 # via # jsonschema # jsonschema-specifications @@ -990,9 +991,9 @@ rpds-py==0.18.0 \ # via # jsonschema # referencing -safir[arq,db,redis]==5.2.1 \ - --hash=sha256:1b61cc72881ddfb66e1f84b6c34ca7e062f27b5669b9d1d07377ebd117ce3ebf \ - --hash=sha256:e39e2260e87303de9aaac157b45743ec9f82d2e84065d10b21008fceaa8aa407 +safir[arq,db,redis]==5.2.2 \ + --hash=sha256:13069fb1413443be3685e337ce54f25fc427a86ff718c1e3ca44daaaf5c03d34 \ + --hash=sha256:e6ad2553e60d1b74bfdbb47bda83776843bb74c3599760b66391253bb0bd4ddb # via -r requirements/main.in six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ @@ -1061,6 +1062,10 @@ sqlalchemy[asyncio]==2.0.28 \ --hash=sha256:feea693c452d85ea0015ebe3bb9cd15b6f49acc1a31c28b3c50f4db0f8fb1e71 \ --hash=sha256:fffcc8edc508801ed2e6a4e7b0d150a62196fd28b4e16ab9f65192e8186102b6 # via safir +sse-starlette==2.0.0 \ + --hash=sha256:0c43cc43aca4884c88c8416b65777c4de874cc4773e6458d3579c0a353dc2fb7 \ + --hash=sha256:c4dd134302cb9708d47cae23c365fe0a089aa2a875d2f887ac80f235a9ee5744 + # via -r requirements/main.in starlette==0.36.3 \ --hash=sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044 \ --hash=sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080 @@ -1068,6 +1073,7 @@ starlette==0.36.3 \ # -r requirements/main.in # fastapi # safir + # sse-starlette structlog==24.1.0 \ --hash=sha256:3f6efe7d25fab6e86f277713c218044669906537bb717c1807a09d46bca0714d \ --hash=sha256:41a09886e4d55df25bdcb9b5c9674bccfab723ff43e0a86a1b7b236be8e57b16 @@ -1089,9 +1095,9 @@ tornado==6.4 \ --hash=sha256:f7894c581ecdcf91666a0912f18ce5e757213999e183ebfc2c3fdbf4d5bd764e \ --hash=sha256:fd03192e287fbd0899dd8f81c6fb9cbbc69194d2074b38f384cb6fa72b80e9c2 # via jupyter-client -traitlets==5.14.1 \ - --hash=sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74 \ - --hash=sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e +traitlets==5.14.2 \ + --hash=sha256:8cdd83c040dab7d1dee822678e5f5d100b514f7b72b01615b26fc5718916fdf9 \ + --hash=sha256:fcdf85684a772ddeba87db2f398ce00b40ff550d1528c03c14dbf6a02003cd80 # via # jupyter-client # jupyter-core @@ -1118,7 +1124,9 @@ uritemplate==4.1.1 \ uvicorn[standard]==0.28.0 \ --hash=sha256:6623abbbe6176204a4226e67607b4d52cc60ff62cda0ff177613645cefa2ece1 \ --hash=sha256:cab4473b5d1eaeb5a0f6375ac4bc85007ffc75c3cc1768816d9e5d589857b067 - # via -r requirements/main.in + # via + # -r requirements/main.in + # sse-starlette uvloop==0.19.0 \ --hash=sha256:0246f4fd1bf2bf702e06b0d45ee91677ee5c31242f39aab4ea6fe0c51aedd0fd \ --hash=sha256:02506dc23a5d90e04d4f65c7791e65cf44bd91b37f24cfc3ef6cf2aff05dc7ec \ From 112f4732a2cb481f328b57bca1c8fc2ce5f7b660 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Tue, 19 Mar 2024 18:22:22 -0400 Subject: [PATCH 07/12] NbHtmlModel generates its own query string Now NbHtmlModel can generate the query parameters for the page instance. I do want to improve this by coordinating with the code that casts query parameters into values so we can ensure the round trip of url to NbHtmlModel (and NbDisplaySettings) and then back to URL is correct. --- src/timessquare/domain/nbhtml.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/timessquare/domain/nbhtml.py b/src/timessquare/domain/nbhtml.py index 4994590e..92b8816a 100644 --- a/src/timessquare/domain/nbhtml.py +++ b/src/timessquare/domain/nbhtml.py @@ -124,9 +124,27 @@ def create_key(self) -> NbHtmlKey: return NbHtmlKey( name=self.page_name, values=dict(self.values), - display_settings=NbDisplaySettings(hide_code=self.hide_code), + display_settings=self.display_settings, ) + @property + def url_params(self) -> dict[str, str]: + """The URL query parameters for this HTML rendering, + including both notebook variables and display settings. + """ + # TODO(jonathansick): Do we need to worry about encoding these values + # back to strings. For example, a bool value should go back to a 1 or 0 + # Perhaps this code should be coordinated with parameter casting in + # `timessquare.domain.page.PageParameterSchema.cast_value`. + params = {key: str(value) for key, value in self.values.items()} + params.update(self.display_settings.url_params) + return params + + @property + def display_settings(self) -> NbDisplaySettings: + """The display settings for this HTML rendering.""" + return NbDisplaySettings(hide_code=self.hide_code) + @dataclass class NbHtmlKey(PageInstanceIdModel): @@ -153,3 +171,8 @@ def cache_key(self) -> str: "utf-8" ) ).decode("utf-8") + + @property + def url_params(self) -> dict[str, str]: + """Get the URL query parameters for these display settings.""" + return {"ts_hide_code": str(int(self.hide_code))} From 43e26f71b758beb286a7682c7f7598645fab875e Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 20 Mar 2024 12:11:12 -0400 Subject: [PATCH 08/12] Add GET /v1/pages/:page/html/events SSE endpoint This is a server-sent events endpoint that provides a stream of updates about a notebook's execution. This is particularly useful for a client to follow the progress of recalculating a page instance. This endpoint might also be able to replace the /htmlstatus endpoint so that a client no longer needs to poll it for the current HTML version. Right now the event generator implements its own polling loop to ask noteburst for status while a job is in progress. Ideally we'd like to only send an event when there's a change (i.e. noteburst API sends its own SSE stream driven by the arq queue events) and that extra events aren't sent while the HTML remains static to reduce the overall bandwidth. --- src/timessquare/domain/ssemodels.py | 161 +++++++++++++++++++++++ src/timessquare/handlers/v1/endpoints.py | 43 ++++++ src/timessquare/services/page.py | 71 +++++++++- 3 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 src/timessquare/domain/ssemodels.py diff --git a/src/timessquare/domain/ssemodels.py b/src/timessquare/domain/ssemodels.py new file mode 100644 index 00000000..71adaa06 --- /dev/null +++ b/src/timessquare/domain/ssemodels.py @@ -0,0 +1,161 @@ +"""Models for Server-Sent Events (SSE) endpoints. + +Typically external models are maintained in the handlers subpackge, but +SSE emits data at a lower level so we provide a specific module for SSE +payload models that Times Square provides. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from datetime import datetime, timedelta +from typing import Any +from urllib.parse import urlencode + +from pydantic import AnyHttpUrl, BaseModel, Field, field_serializer +from sse_starlette import ServerSentEvent + +from ..storage.noteburst import NoteburstJobResponseModel, NoteburstJobStatus +from .nbhtml import NbHtmlModel + + +class HtmlEventsModel(BaseModel): + """Model for the notebook execution and HTML rendering events emitted by + the SSE endpoint. + """ + + date_submitted: datetime | None = Field( + ..., + description=( + "The time when the notebook execution job was submitted, or None " + "if no job is ongoing." + ), + ) + + date_started: datetime | None = Field( + ..., + description=( + "The time when the notebook execution started, or None if no job " + "is ongoing or the execution hasn't started yet." + ), + ) + + date_finished: datetime | None = Field( + ..., + description=( + "The time when the notebook execution finished, or None if no job " + "is ongoing or the execution hasn't finished yet." + ), + ) + + execution_status: NoteburstJobStatus | None = Field( + ..., + description=( + "The status of the notebook execution job, or None if " + "the notebook has not been queued to executed yet." + ), + ) + + execution_duration: timedelta | None = Field( + ..., + description=( + "The duration of the notebook execution in seconds, or None if no " + "execution has completed." + ), + ) + + html_hash: str | None = Field( + ..., + description=( + "The sha256 hash of the HTML content, or None if no HTML is " + "available." + ), + ) + + html_url: AnyHttpUrl = Field( + ..., + description=( + "The URL of the HTML content, or None if no HTML is available." + ), + ) + + @field_serializer("execution_duration") + def serialize_timedelta_seconds( + self, td: timedelta | None + ) -> float | None: + if td: + return td.total_seconds() + return None + + @classmethod + def create( + cls, + *, + noteburst_job: NoteburstJobResponseModel | None, + nbhtml: NbHtmlModel | None, + html_base_url: str, + request_query_params: Mapping[str, Any], + ) -> HtmlEventsModel: + """Create an instance from a ``NoteburstJobResponseModelModel`` and the + Redis-cached ``NbHtmlModel`` (if available). + """ + # Where dates are sourced from depends on whether the job is ongoing + # in Noteburst or if it completed and the HTML was rendered. + + date_started: datetime | None = None + date_submitted: datetime | None = None + date_finished: datetime | None = None + execution_status: NoteburstJobStatus | None = None + execution_duration: timedelta | None = None + html_hash: str | None = None + + if noteburst_job: + # Execution is ongoing at Noteburst so derive dates from the job + date_submitted = noteburst_job.enqueue_time + date_started = noteburst_job.start_time + date_finished = noteburst_job.finish_time + execution_status = noteburst_job.status + if ( + noteburst_job.status == NoteburstJobStatus.complete + and noteburst_job.finish_time + and noteburst_job.start_time + ): + execution_duration = ( + noteburst_job.finish_time - noteburst_job.start_time + ) + else: + execution_duration = None + elif nbhtml: + # Execution has completed and the HTML is available + date_started = nbhtml.date_executed - nbhtml.execution_duration + date_submitted = date_started # This is an approximation + date_finished = nbhtml.date_executed + execution_status = NoteburstJobStatus.complete + execution_duration = nbhtml.execution_duration + + if nbhtml: + qs = urlencode(nbhtml.url_params) + html_url = AnyHttpUrl(f"{html_base_url}?{qs}") + html_hash = nbhtml.html_hash + elif noteburst_job: + # If there isn't any HTML already, then we can't use the resolved + # values from NbHtmlModel, so we use the query string from the + # initial request instead + qs = urlencode(request_query_params) + html_url = AnyHttpUrl(f"{html_base_url}?{qs}") + else: + html_url = AnyHttpUrl(html_base_url) + + return cls( + date_submitted=date_submitted, + date_started=date_started, + date_finished=date_finished, + execution_status=execution_status, + execution_duration=execution_duration, + html_hash=html_hash, + html_url=html_url, + ) + + def to_sse(self) -> ServerSentEvent: + """Serialize the model to a ServerSentEvent.""" + return ServerSentEvent(self.model_dump_json()) diff --git a/src/timessquare/handlers/v1/endpoints.py b/src/timessquare/handlers/v1/endpoints.py index 3a961aac..f5671d79 100644 --- a/src/timessquare/handlers/v1/endpoints.py +++ b/src/timessquare/handlers/v1/endpoints.py @@ -8,6 +8,7 @@ from safir.metadata import get_metadata from safir.models import ErrorLocation, ErrorModel from safir.slack.webhook import SlackRouteErrorHandler +from sse_starlette import EventSourceResponse from timessquare.config import config from timessquare.dependencies.requestcontext import ( @@ -447,6 +448,48 @@ async def get_page_html_status( return HtmlStatus.from_html(html=html, request=context.request) +@v1_router.get( + "/pages/{page}/html/events", + summary=( + "Subscribe to an event stream for a page's execution and rendering." + ), + name="get_page_html_events", + tags=[ApiTags.pages], + responses={ + 200: { + "content": {"text/event-stream": {}}, + "description": "Event stream", + }, + 404: {"description": "Page not found", "model": ErrorModel}, + 422: {"description": "Invalid parameter", "model": ErrorModel}, + }, +) +async def get_page_html_events( + page: Annotated[str, page_path_parameter], + context: Annotated[RequestContext, Depends(context_dependency)], +) -> EventSourceResponse: + """Subscribe to an event stream for a page's execution and rendering.""" + context.logger.debug("Subscribing to page events") + page_service = context.page_service + html_base_url = context.request.url_for("get_page_html", page=page) + async with context.session.begin(): + try: + generator = await page_service.get_html_events_iter( + name=page, + query_params=context.request.query_params, + html_base_url=str(html_base_url), + ) + return EventSourceResponse(generator, send_timeout=5) + except PageNotFoundError as e: + e.location = ErrorLocation.path + e.field_path = ["page"] + raise + except ParameterSchemaValidationError as e: + e.location = ErrorLocation.query + e.field_path = [e.parameter] + raise + + @v1_router.get( "/github", summary="Get a tree of GitHub-backed pages", diff --git a/src/timessquare/services/page.py b/src/timessquare/services/page.py index 720500ad..39dd1d08 100644 --- a/src/timessquare/services/page.py +++ b/src/timessquare/services/page.py @@ -2,7 +2,8 @@ from __future__ import annotations -from collections.abc import Mapping +import asyncio +from collections.abc import AsyncIterator, Mapping from dataclasses import asdict from datetime import UTC, datetime from typing import Any @@ -20,6 +21,7 @@ PageSummaryModel, PersonModel, ) +from ..domain.ssemodels import HtmlEventsModel from ..exceptions import PageNotFoundError from ..storage.nbhtmlcache import NbHtmlCacheStore from ..storage.noteburst import ( @@ -495,3 +497,70 @@ def html_display_settings_matrix(self) -> list[NbDisplaySettings]: NbDisplaySettings(hide_code=True), NbDisplaySettings(hide_code=False), ] + + async def get_html_events_iter( + self, + name: str, + query_params: Mapping[str, Any], + html_base_url: str, + ) -> AsyncIterator[bytes]: + """Get an iterator providing an event stream for the HTML rendering + for a page instance. + """ + page = await self.get_page(name) + resolved_values = page.resolve_and_validate_values(query_params) + # also get the Display settings query params + page_instance = PageInstanceModel( + name=page.name, values=resolved_values, page=page + ) + try: + hide_code = bool(int(query_params.get("ts_hide_code", "1"))) + except Exception as e: + raise ValueError("hide_code query parameter must be 1 or 0") from e + display_settings = NbDisplaySettings(hide_code=hide_code) + page_key = NbHtmlKey( + name=page.name, + values=resolved_values, + display_settings=display_settings, + ) + + async def iterator() -> AsyncIterator[bytes]: + try: + while True: + job = await self._job_store.get_instance(page_instance) + noteburst_data: NoteburstJobResponseModel | None = None + # model for html status + if job: + self._logger.debug( + "Got job in events loop", job_url=str(job.job_url) + ) + noteburst_url = str(job.job_url) + noteburst_response = await self.noteburst_api.get_job( + noteburst_url + ) + if noteburst_response.data: + noteburst_data = noteburst_response.data + + nbhtml = await self._html_store.get_instance(page_key) + + payload = HtmlEventsModel.create( + noteburst_job=noteburst_data, + nbhtml=nbhtml, + request_query_params=query_params, + html_base_url=html_base_url, + ) + self._logger.debug( + "Built payload in events loop", payload=payload + ) + yield payload.to_sse().encode() + + await asyncio.sleep(1) + except asyncio.CancelledError: + self._logger.debug("HTML events disconnected from client") + # cleanup as necessary + raise + except Exception as e: + self._logger.exception("Error in HTML events iterator", e=e) + raise + + return iterator() From 2849b946a242225585941b2b197f1f865307d29b Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 20 Mar 2024 16:26:57 -0400 Subject: [PATCH 09/12] Add httpx-sse dependency for testing --- requirements/dev.in | 1 + requirements/dev.txt | 22 +++++++++++++--------- requirements/main.txt | 6 +++--- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/requirements/dev.in b/requirements/dev.in index 1f8037c5..9faf3869 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -23,3 +23,4 @@ respx types-PyYAML types-redis documenteer[guide]>=1.0.0a1 +httpx-sse == 0.4.0 diff --git a/requirements/dev.txt b/requirements/dev.txt index 123e86c6..62d23020 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -357,6 +357,10 @@ httpx==0.27.0 \ # -c requirements/main.txt # -r requirements/dev.in # respx +httpx-sse==0.4.0 \ + --hash=sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721 \ + --hash=sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f + # via -r requirements/dev.in identify==2.5.35 \ --hash=sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791 \ --hash=sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e @@ -685,9 +689,9 @@ pytest==8.1.1 \ # pytest-asyncio # pytest-cov # pytest-mock -pytest-asyncio==0.23.5.post1 \ - --hash=sha256:30f54d27774e79ac409778889880242b0403d09cabd65b727ce90fe92dd5d80e \ - --hash=sha256:b9a8806bea78c21276bc34321bbf234ba1b2ea5b30d9f0ce0f2dea45e4685813 +pytest-asyncio==0.23.6 \ + --hash=sha256:68516fdd1018ac57b846c9846b954f0393b26f094764a28c955eabb0536a4e8a \ + --hash=sha256:ffe523a89c1c222598c76856e76852b787504ddb72dd5d9b6617ffa8aa2cde5f # via -r requirements/dev.in pytest-cov==4.1.0 \ --hash=sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6 \ @@ -769,9 +773,9 @@ requests==2.31.0 \ # via # documenteer # sphinx -respx==0.20.2 \ - --hash=sha256:07cf4108b1c88b82010f67d3c831dae33a375c7b436e54d87737c7f9f99be643 \ - --hash=sha256:ab8e1cf6da28a5b2dd883ea617f8130f77f676736e6e9e4a25817ad116a172c9 +respx==0.21.0 \ + --hash=sha256:0293d9c92b58f5d31bf24e4545129779b4194de156227eae8f5f8eedb5eaa6cc \ + --hash=sha256:30f6ec0e82d00bc7b664d79155e5df34ce40b5183f6eb4460e371ced7ae7232e # via -r requirements/dev.in rpds-py==0.18.0 \ --hash=sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f \ @@ -1085,9 +1089,9 @@ urllib3==2.2.1 \ # via # documenteer # requests -uvicorn==0.28.0 \ - --hash=sha256:6623abbbe6176204a4226e67607b4d52cc60ff62cda0ff177613645cefa2ece1 \ - --hash=sha256:cab4473b5d1eaeb5a0f6375ac4bc85007ffc75c3cc1768816d9e5d589857b067 +uvicorn==0.29.0 \ + --hash=sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de \ + --hash=sha256:6a69214c0b6a087462412670b3ef21224fa48cae0e452b5883e8e8bdfdd11dd0 # via # -c requirements/main.txt # -r requirements/dev.in diff --git a/requirements/main.txt b/requirements/main.txt index 6005e591..e042c7fb 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -1121,9 +1121,9 @@ uritemplate==4.1.1 \ --hash=sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0 \ --hash=sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e # via gidgethub -uvicorn[standard]==0.28.0 \ - --hash=sha256:6623abbbe6176204a4226e67607b4d52cc60ff62cda0ff177613645cefa2ece1 \ - --hash=sha256:cab4473b5d1eaeb5a0f6375ac4bc85007ffc75c3cc1768816d9e5d589857b067 +uvicorn[standard]==0.29.0 \ + --hash=sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de \ + --hash=sha256:6a69214c0b6a087462412670b3ef21224fa48cae0e452b5883e8e8bdfdd11dd0 # via # -r requirements/main.in # sse-starlette From ecb5d664eb4efb794b57e65bf6084bed561e4166 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Thu, 21 Mar 2024 18:59:05 -0400 Subject: [PATCH 10/12] Add DeleteHtmlResponse model for soft html deletes This model provides links to both the HTML endpoint and to the server-sent events endpoint, both of which the client might want to use after sending a soft-delete. --- src/timessquare/domain/nbhtml.py | 8 +++- src/timessquare/handlers/v1/endpoints.py | 10 +++-- src/timessquare/handlers/v1/models.py | 53 ++++++++++++++++++++++-- src/timessquare/services/page.py | 3 +- 4 files changed, 66 insertions(+), 8 deletions(-) diff --git a/src/timessquare/domain/nbhtml.py b/src/timessquare/domain/nbhtml.py index 92b8816a..e1166e4c 100644 --- a/src/timessquare/domain/nbhtml.py +++ b/src/timessquare/domain/nbhtml.py @@ -4,10 +4,11 @@ import json from base64 import b64encode +from collections.abc import Mapping from dataclasses import asdict, dataclass from datetime import UTC, datetime, timedelta from hashlib import sha256 -from typing import Annotated, Any +from typing import Annotated, Any, Self from nbconvert.exporters.html import HTMLExporter from pydantic import BaseModel, Field @@ -164,6 +165,11 @@ class NbDisplaySettings: hide_code: bool + @classmethod + def from_url_params(cls, params: Mapping[str, str]) -> Self: + """Create an instance from URL query parameters.""" + return cls(hide_code=bool(int(params.get("ts_hide_code", 1)))) + @property def cache_key(self) -> str: return b64encode( diff --git a/src/timessquare/handlers/v1/endpoints.py b/src/timessquare/handlers/v1/endpoints.py index f5671d79..f7023076 100644 --- a/src/timessquare/handlers/v1/endpoints.py +++ b/src/timessquare/handlers/v1/endpoints.py @@ -23,6 +23,7 @@ from ..apitags import ApiTags from .models import ( + DeleteHtmlResponse, GitHubContentsRoot, GitHubPrContents, HtmlStatus, @@ -377,6 +378,7 @@ async def get_page_html( "/pages/{page}/html", summary="Delete the cached HTML of a notebook.", name="delete_page_html", + response_model=DeleteHtmlResponse, tags=[ApiTags.pages], responses={ 404: {"description": "Cached HTML not found", "model": ErrorModel}, @@ -386,7 +388,7 @@ async def get_page_html( async def delete_page_html( page: Annotated[str, page_path_parameter], context: Annotated[RequestContext, Depends(context_dependency)], -) -> PlainTextResponse: +) -> DeleteHtmlResponse: """Delete the cached HTML of a notebook execution, causing it to be recomputed in the background. @@ -398,7 +400,7 @@ async def delete_page_html( page_service = context.page_service async with context.session.begin(): try: - await page_service.soft_delete_html( + page_instance = await page_service.soft_delete_html( name=page, query_params=context.request.query_params ) except PageNotFoundError as e: @@ -412,7 +414,9 @@ async def delete_page_html( # Ulimately create a resource that describes the background task; # or subscribe the client to a SSE stream that reports the task's progress. - return PlainTextResponse(status_code=202) + return DeleteHtmlResponse.from_page_instance( + page_instance=page_instance, request=context.request + ) @v1_router.get( diff --git a/src/timessquare/handlers/v1/models.py b/src/timessquare/handlers/v1/models.py index 01f433f4..099a4b88 100644 --- a/src/timessquare/handlers/v1/models.py +++ b/src/timessquare/handlers/v1/models.py @@ -4,7 +4,7 @@ from datetime import datetime from enum import Enum -from typing import Any +from typing import Any, Self from urllib.parse import urlencode from fastapi import Request @@ -20,8 +20,13 @@ from safir.metadata import Metadata as SafirMetadata from timessquare.domain.githubtree import GitHubNode, GitHubNodeType -from timessquare.domain.nbhtml import NbHtmlModel -from timessquare.domain.page import PageModel, PageSummaryModel, PersonModel +from timessquare.domain.nbhtml import NbDisplaySettings, NbHtmlModel +from timessquare.domain.page import ( + PageInstanceIdModel, + PageModel, + PageSummaryModel, + PersonModel, +) class Index(BaseModel): @@ -415,6 +420,48 @@ def from_html( ) +class DeleteHtmlResponse(BaseModel): + """Response for a successful HTML soft-deletion.""" + + html_url: AnyHttpUrl = page_html_field + + html_events_url: AnyHttpUrl = Field( + ..., + title="HTML events URL", + description=( + "The URL for the server-sent events stream that gives updates " + "about the HTML rendering." + ), + ) + + @classmethod + def from_page_instance( + cls, *, page_instance: PageInstanceIdModel, request: Request + ) -> Self: + """Create a DeleteHtmlResponse from the deleted page instance.""" + base_html_url = str( + request.url_for("get_page_html", page=page_instance.name) + ) + base_html_events_url = str( + request.url_for("get_page_html_events", page=page_instance.name) + ) + display_settings = NbDisplaySettings.from_url_params( + request.query_params + ) + values = dict(page_instance.values) + values.update(display_settings.url_params) + qs = urlencode(values) + html_url = f"{base_html_url}?{qs}" if qs else base_html_url + html_events_url = ( + f"{base_html_events_url}?{qs}" if qs else base_html_events_url + ) + + return cls( + html_url=AnyHttpUrl(html_url), + html_events_url=AnyHttpUrl(html_events_url), + ) + + class PostPageRequest(BaseModel): """A payload for creating a new page.""" diff --git a/src/timessquare/services/page.py b/src/timessquare/services/page.py index 39dd1d08..5be4368a 100644 --- a/src/timessquare/services/page.py +++ b/src/timessquare/services/page.py @@ -394,7 +394,7 @@ async def _get_html_from_noteburst_job( async def soft_delete_html( self, *, name: str, query_params: Mapping[str, Any] - ) -> None: + ) -> PageInstanceModel: """Soft delete the HTML for a page given the query parameters.""" page = await self.get_page(name) resolved_values = page.resolve_and_validate_values(query_params) @@ -409,6 +409,7 @@ async def soft_delete_html( noteburst_job=exec_info.noteburst_job, ) # Format the job for a response + return page_instance async def request_noteburst_execution( self, page_instance: PageInstanceModel, *, enable_retry: bool = True From 47f99978d797804d40ec2df335723a33605c39d7 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Thu, 21 Mar 2024 19:21:42 -0400 Subject: [PATCH 11/12] Add html_events_url to the Page resource --- src/timessquare/handlers/v1/models.py | 33 ++++++++++++++++----------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/timessquare/handlers/v1/models.py b/src/timessquare/handlers/v1/models.py index 099a4b88..c87f3855 100644 --- a/src/timessquare/handlers/v1/models.py +++ b/src/timessquare/handlers/v1/models.py @@ -134,6 +134,15 @@ class Index(BaseModel): ), ) +page_html_events_field = Field( + ..., + title="HTML events URL", + description=( + "The URL for the server-sent events stream that gives updates " + "about the HTML rendering." + ), +) + ipynb_field = Field( ..., examples=["{...}"], @@ -276,6 +285,8 @@ class Page(BaseModel): html_status_url: AnyHttpUrl = page_html_status_field + html_events_url: AnyHttpUrl = page_html_events_field + parameters: dict[str, dict[str, Any]] = page_parameters_field github: GitHubSourceMetadata | None = Field( @@ -318,19 +329,22 @@ def from_domain(cls, *, page: PageModel, request: Request) -> Page: uploader_username=page.uploader_username, parameters=parameters, self_url=AnyHttpUrl( - str(request.url_for("get_page", page=page.name)), + str(request.url_for("get_page", page=page.name)) ), source_url=AnyHttpUrl( - str(request.url_for("get_page_source", page=page.name)), + str(request.url_for("get_page_source", page=page.name)) ), rendered_url=AnyHttpUrl( - str(request.url_for("get_rendered_notebook", page=page.name)), + str(request.url_for("get_rendered_notebook", page=page.name)) ), html_url=AnyHttpUrl( - str(request.url_for("get_page_html", page=page.name)), + str(request.url_for("get_page_html", page=page.name)) ), html_status_url=AnyHttpUrl( - str(request.url_for("get_page_html_status", page=page.name)), + str(request.url_for("get_page_html_status", page=page.name)) + ), + html_events_url=AnyHttpUrl( + str(request.url_for("get_page_html_events", page=page.name)) ), github=github_metadata, ) @@ -425,14 +439,7 @@ class DeleteHtmlResponse(BaseModel): html_url: AnyHttpUrl = page_html_field - html_events_url: AnyHttpUrl = Field( - ..., - title="HTML events URL", - description=( - "The URL for the server-sent events stream that gives updates " - "about the HTML rendering." - ), - ) + html_events_url: AnyHttpUrl = page_html_events_field @classmethod def from_page_instance( From fecc6367b159c103023586e52390a659e10b310d Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 27 Mar 2024 11:05:31 -0400 Subject: [PATCH 12/12] Update change log for 0.11.0 --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00014f05..c70fee40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,15 @@ Collect fragments into this file with: scriv collect --version X.Y.Z + + +## 0.11.0 (2024-03-27) + +### New features + +- New support for background recomputation of a page instance (cached HTML) with the new `DELETE /v1/pages/:page/html?{params}` endpoint. This endpoint triggers a Noteburst computation of the page instance and deletes the currently-cached HTML once that computation is complete. This API provides a way for users to request a recomputation of a page instance without affecting other users that may be viewing that page instance. +- A new server-sent events endpoint for getting updates on the status of a page instance's computation and HTML rendering: `GET /v1/pages/:page/html/events?{params}`. This endpoint should replace the current practice of clients polling the `GET /v1/pages/:page/htmlstatus` endpoint to determine when a page instance's HTML is ready to be displayed. The events endpoint also provides additional metadata, such as the time when the current computation job was queued so that clients can provide more detailed status information to users. This endpoint works well with the new `DELETE /v1/pages/:page/html?{params}` endpoint, as it can provide updates on the status of the recomputation job while still linking to the existing cached HTML. + ## 0.10.0 (2024-03-13)