diff --git a/sphinx/ext/intersphinx/_load.py b/sphinx/ext/intersphinx/_load.py index 71049153dae..4b53cca155a 100644 --- a/sphinx/ext/intersphinx/_load.py +++ b/sphinx/ext/intersphinx/_load.py @@ -6,6 +6,7 @@ import functools import posixpath import time +from operator import itemgetter from os import path from typing import TYPE_CHECKING from urllib.parse import urlsplit, urlunsplit @@ -138,36 +139,41 @@ def load_mappings(app: Sphinx) -> None: intersphinx_cache: dict[InventoryURI, InventoryCacheEntry] = inventories.cache intersphinx_mapping: IntersphinxMapping = app.config.intersphinx_mapping + expected_uris = {uri for _name, (uri, _invs) in intersphinx_mapping.values()} + + for uri in frozenset(intersphinx_cache): + if intersphinx_cache[uri][0] not in intersphinx_mapping: + # Remove all cached entries that are no longer in `intersphinx_mapping`. + del intersphinx_cache[uri] + elif uri not in expected_uris: + # Remove cached entries with a different target URI + # than the one in `intersphinx_mapping`. + # This happens when the URI in `intersphinx_mapping` is changed. + del intersphinx_cache[uri] + with concurrent.futures.ThreadPoolExecutor() as pool: - futures = [] - for name, (uri, invs) in intersphinx_mapping.values(): - futures.append(pool.submit( - fetch_inventory_group, name, uri, invs, intersphinx_cache, app, now, - )) + futures = [ + pool.submit(fetch_inventory_group, name, uri, invs, intersphinx_cache, app, now) + for name, (uri, invs) in app.config.intersphinx_mapping.values() + ] updated = [f.result() for f in concurrent.futures.as_completed(futures)] if any(updated): + # clear the local inventories inventories.clear() # Duplicate values in different inventories will shadow each - # other; which one will override which can vary between builds - # since they are specified using an unordered dict. To make - # it more consistent, we sort the named inventories and then - # add the unnamed inventories last. This means that the - # unnamed inventories will shadow the named ones but the named - # ones can still be accessed when the name is specified. - named_vals = [] - unnamed_vals = [] - for name, _expiry, invdata in intersphinx_cache.values(): - if name: - named_vals.append((name, invdata)) - else: - unnamed_vals.append((name, invdata)) - for name, invdata in sorted(named_vals) + unnamed_vals: - if name: - inventories.named_inventory[name] = invdata - for type, objects in invdata.items(): - inventories.main_inventory.setdefault(type, {}).update(objects) + # other; which one will override which can vary between builds. + # + # In an attempt to make this more consistent, + # we sort the named inventories in the cache + # by their name and expiry time ``(NAME, EXPIRY)``. + by_name_and_time = itemgetter(0, 1) # 0: name, 1: expiry + cache_values = sorted(intersphinx_cache.values(), key=by_name_and_time) + for name, _expiry, invdata in cache_values: + inventories.named_inventory[name] = invdata + for objtype, objects in invdata.items(): + inventories.main_inventory.setdefault(objtype, {}).update(objects) def fetch_inventory_group( @@ -179,39 +185,43 @@ def fetch_inventory_group( now: int, ) -> bool: cache_time = now - app.config.intersphinx_cache_limit * 86400 + + updated = False failures = [] - try: - for inv in invs: - if not inv: - inv = posixpath.join(uri, INVENTORY_FILENAME) - # decide whether the inventory must be read: always read local - # files; remote ones only if the cache time is expired - if '://' not in inv or uri not in cache or cache[uri][1] < cache_time: - safe_inv_url = _get_safe_url(inv) - inv_descriptor = name or 'main_inventory' - LOGGER.info(__("loading intersphinx inventory '%s' from %s..."), - inv_descriptor, safe_inv_url) - try: - invdata = fetch_inventory(app, uri, inv) - except Exception as err: - failures.append(err.args) - continue - if invdata: - cache[uri] = name, now, invdata - return True - return False - finally: - if not failures: - pass - elif len(failures) < len(invs): - LOGGER.info(__('encountered some issues with some of the inventories,' - ' but they had working alternatives:')) - for fail in failures: - LOGGER.info(*fail) - else: - issues = '\n'.join(f[0] % f[1:] for f in failures) - LOGGER.warning(__('failed to reach any of the inventories ' - 'with the following issues:') + '\n' + issues) + + for location in invs: + # location is either None or a non-empty string + inv = f'{uri}/{INVENTORY_FILENAME}' if location is None else location + + # decide whether the inventory must be read: always read local + # files; remote ones only if the cache time is expired + if '://' not in inv or uri not in cache or cache[uri][1] < cache_time: + LOGGER.info(__("loading intersphinx inventory '%s' from %s ..."), + name, _get_safe_url(inv)) + + try: + invdata = fetch_inventory(app, uri, inv) + except Exception as err: + failures.append(err.args) + continue + + if invdata: + cache[uri] = name, now, invdata + updated = True + break + + if not failures: + pass + elif len(failures) < len(invs): + LOGGER.info(__('encountered some issues with some of the inventories,' + ' but they had working alternatives:')) + for fail in failures: + LOGGER.info(*fail) + else: + issues = '\n'.join(f[0] % f[1:] for f in failures) + LOGGER.warning(__('failed to reach any of the inventories ' + 'with the following issues:') + '\n' + issues) + return updated def fetch_inventory(app: Sphinx, uri: InventoryURI, inv: str) -> Inventory: diff --git a/tests/test_extensions/test_ext_intersphinx_cache.py b/tests/test_extensions/test_ext_intersphinx_cache.py new file mode 100644 index 00000000000..e34fab8b2d6 --- /dev/null +++ b/tests/test_extensions/test_ext_intersphinx_cache.py @@ -0,0 +1,298 @@ +"""Test the intersphinx extension.""" + +from __future__ import annotations + +import posixpath +import re +import zlib +from http.server import BaseHTTPRequestHandler +from io import BytesIO +from typing import TYPE_CHECKING + +from sphinx.ext.intersphinx import InventoryAdapter +from sphinx.testing.util import SphinxTestApp + +from tests.utils import http_server + +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import BinaryIO + + from sphinx.util.typing import InventoryItem + +BASE_CONFIG = { + 'extensions': ['sphinx.ext.intersphinx'], + 'intersphinx_timeout': 0.1, +} + + +class InventoryEntry: + """Entry in the Intersphinx inventory.""" + + __slots__ = ( + 'name', 'display_name', 'domain_name', + 'object_type', 'uri', 'anchor', 'priority', + ) + + def __init__( + self, + name: str = 'this', + *, + display_name: str | None = None, + domain_name: str = 'py', + object_type: str = 'obj', + uri: str = 'index.html', + anchor: str = '', + priority: int = 0, + ): + if anchor.endswith(name): + anchor = anchor[:-len(name)] + '$' + + if anchor: + uri += '#' + anchor + + if display_name is None or display_name == name: + display_name = '-' + + self.name = name + self.display_name = display_name + self.domain_name = domain_name + self.object_type = object_type + self.uri = uri + self.anchor = anchor + self.priority = priority + + def format(self) -> str: + """Format the entry as it appears in the inventory file.""" + return (f'{self.name} {self.domain_name}:{self.object_type} ' + f'{self.priority} {self.uri} {self.display_name}\n') + + +class IntersphinxProject: + def __init__( + self, + *, + name: str = 'spam', + version: str | int = 1, + baseurl: str = '', + baseuri: str = '', + file: str | None = None, + ) -> None: + #: The project name. + self.name = name + #: The escaped project name. + self.safe_name = re.sub(r'\\s+', ' ', name) + + #: The project version as a string. + self.version = version = str(version) + #: The escaped project version. + self.safe_version = re.sub(r'\\s+', ' ', version) + + #: The project base URL (e.g., http://localhost:9341). + self.baseurl = baseurl + #: The project base URI, relative to *baseurl* (e.g., 'spam'). + self.uri = baseuri + #: The project URL, as specified in :confval:`intersphinx_mapping`. + self.url = posixpath.join(baseurl, baseuri) + #: The project local file, if any. + self.file = file + + @property + def record(self) -> dict[str, tuple[str | None, str | None]]: + """The :confval:`intersphinx_mapping` record for this project.""" + return {self.name: (self.url, self.file)} + + def normalise(self, entry: InventoryEntry) -> tuple[str, InventoryItem]: + """Format an inventory entry as if it were part of this project.""" + url = posixpath.join(self.url, entry.uri) + return entry.name, (self.safe_name, self.safe_version, url, entry.display_name) + + +class FakeInventory: + protocol_version: int + + def __init__(self, project: IntersphinxProject | None = None) -> None: + self.project = project or IntersphinxProject() + + def serialise(self, entries: Iterable[InventoryEntry] | None = None) -> bytes: + buffer = BytesIO() + self._write_headers(buffer) + entries = entries or [InventoryEntry()] + self._write_body(buffer, (item.format().encode() for item in entries)) + return buffer.getvalue() + + def _write_headers(self, buffer: BinaryIO) -> None: + buffer.write((f'# Sphinx inventory version {self.protocol_version}\n' + f'# Project: {self.project.safe_name}\n' + f'# Version: {self.project.safe_version}\n').encode()) + + def _write_body(self, buffer: BinaryIO, lines: Iterable[bytes]) -> None: + raise NotImplementedError + + +class FakeInventoryV2(FakeInventory): + protocol_version = 2 + + def _write_headers(self, buffer: BinaryIO) -> None: + super()._write_headers(buffer) + buffer.write(b'# The remainder of this file is compressed using zlib.\n') + + def _write_body(self, buffer: BinaryIO, lines: Iterable[bytes]) -> None: + compressor = zlib.compressobj(9) + buffer.writelines(map(compressor.compress, lines)) + buffer.write(compressor.flush()) + + +class SingleEntryProject(IntersphinxProject): + name = 'spam' + port = 9341 # needed since otherwise it's an automatic port + + def __init__( + self, + version: int, + route: str, + *, + item_name: str = 'ham', + domain_name: str = 'py', + object_type: str = 'module' + ) -> None: + super().__init__( + name=self.name, + version=version, + baseurl=f'http://localhost:{self.port}', + baseuri=route, + ) + self.item_name = item_name + self.domain_name = domain_name + self.object_type = object_type + self.reftype = f'{domain_name}:{object_type}' + + def make_entry(self) -> InventoryEntry: + """Get an inventory entry for this project.""" + name = f'{self.item_name}_{self.version}' + return InventoryEntry(name, domain_name=self.domain_name, object_type=self.object_type) + + +def make_inventory_handler(*projects: SingleEntryProject) -> type[BaseHTTPRequestHandler]: + name, port = projects[0].name, projects[0].port + assert all(p.name == name for p in projects) + assert all(p.port == port for p in projects) + + class InventoryHandler(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200, 'OK') + + data = b'' + for project in projects: + # create the data to return depending on the endpoint + if self.path.startswith(f'/{project.uri}/'): + entry = project.make_entry() + data = FakeInventoryV2(project).serialise([entry]) + break + + self.send_header('Content-Length', str(len(data))) + self.end_headers() + self.wfile.write(data) + + def log_message(*args, **kwargs): + pass + + return InventoryHandler + + +def test_intersphinx_project_fixture(): + # check that our fixture class is correct + project = SingleEntryProject(1, 'route') + assert project.url == 'http://localhost:9341/route' + + +def test_load_mappings_cache(tmp_path): + tmp_path.joinpath('conf.py').touch() + tmp_path.joinpath('index.rst').touch() + project = SingleEntryProject(1, 'a') + + InventoryHandler = make_inventory_handler(project) + with http_server(InventoryHandler, port=project.port): + # clean build + confoverrides = BASE_CONFIG | {'intersphinx_mapping': project.record} + app = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides) + app.build() + app.cleanup() + + # the inventory when querying the 'old' URL + entry = project.make_entry() + item = dict((project.normalise(entry),)) + inventories = InventoryAdapter(app.env) + assert list(inventories.cache) == ['http://localhost:9341/a'] + e_name, e_time, e_inv = inventories.cache['http://localhost:9341/a'] + assert e_name == 'spam' + assert e_inv == {'py:module': item} + assert inventories.named_inventory == {'spam': {'py:module': item}} + + +def test_load_mappings_cache_update(tmp_path): + tmp_path.joinpath('conf.py').touch() + tmp_path.joinpath('index.rst').touch() + old_project = SingleEntryProject(1337, 'old') + new_project = SingleEntryProject(1701, 'new') + + InventoryHandler = make_inventory_handler(old_project, new_project) + with http_server(InventoryHandler, port=SingleEntryProject.port): + # build normally to create an initial cache + confoverrides1 = BASE_CONFIG | {'intersphinx_mapping': old_project.record} + app1 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides1) + app1.build() + app1.cleanup() + + # switch to new url and assert that the old URL is no more stored + confoverrides2 = BASE_CONFIG | {'intersphinx_mapping': new_project.record} + app2 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides2) + app2.build() + app2.cleanup() + + entry = new_project.make_entry() + item = dict((new_project.normalise(entry),)) + inventories = InventoryAdapter(app2.env) + # check that the URLs were changed accordingly + assert list(inventories.cache) == ['http://localhost:9341/new'] + e_name, e_time, e_inv = inventories.cache['http://localhost:9341/new'] + assert e_name == 'spam' + assert e_inv == {'py:module': item} + assert inventories.named_inventory == {'spam': {'py:module': item}} + + +def test_load_mappings_cache_revert_update(tmp_path): + tmp_path.joinpath('conf.py').touch() + tmp_path.joinpath('index.rst').touch() + old_project = SingleEntryProject(1337, 'old') + new_project = SingleEntryProject(1701, 'new') + + InventoryHandler = make_inventory_handler(old_project, new_project) + with http_server(InventoryHandler, port=SingleEntryProject.port): + # build normally to create an initial cache + confoverrides1 = BASE_CONFIG | {'intersphinx_mapping': old_project.record} + app1 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides1) + app1.build() + app1.cleanup() + + # switch to new url and build + confoverrides2 = BASE_CONFIG | {'intersphinx_mapping': new_project.record} + app2 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides2) + app2.build() + app2.cleanup() + + # switch back to old url (re-use 'old_item') + confoverrides3 = BASE_CONFIG | {'intersphinx_mapping': old_project.record} + app3 = SphinxTestApp('dummy', srcdir=tmp_path, confoverrides=confoverrides3) + app3.build() + app3.cleanup() + + entry = old_project.make_entry() + item = dict((old_project.normalise(entry),)) + inventories = InventoryAdapter(app3.env) + # check that the URLs were changed accordingly + assert list(inventories.cache) == ['http://localhost:9341/old'] + e_name, e_time, e_inv = inventories.cache['http://localhost:9341/old'] + assert e_name == 'spam' + assert e_inv == {'py:module': item} + assert inventories.named_inventory == {'spam': {'py:module': item}}