Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.x] Remove Intersphinx Legacy Format and Fix intersphinx cache loading in incremental builds #11706

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e50c662
fix intersphinx cache loading in incremental builds
picnixz Oct 4, 2023
ca545a3
fix lint0
picnixz Oct 4, 2023
cfcb4f5
Remove debug print
picnixz Oct 5, 2023
9d436b3
Merge branch 'sphinx-doc:master' into fix/11466-intersphinx-inventory…
picnixz Oct 5, 2023
92243a4
save
picnixz Oct 5, 2023
37250f9
Merge branch 'master' into fix/11466-intersphinx-inventory-consistency
picnixz Oct 9, 2023
4b1ac3f
Merge branch 'fix/11466-intersphinx-inventory-consistency' of github.…
picnixz Feb 3, 2024
4f8bb9e
Merge remote-tracking branch 'upstream/master' into fix/11466-intersp…
picnixz Feb 3, 2024
56f5533
update implementation and comments
picnixz Feb 3, 2024
5ef7919
Merge branch 'master' into fix/11466-intersphinx-inventory-consistency
picnixz Feb 3, 2024
8bbcd83
Merge branch 'master' into fix/11466-intersphinx-inventory-consistency
picnixz Feb 12, 2024
333886a
Merge branch 'master' into fix/11466-intersphinx-inventory-consistency
picnixz Feb 13, 2024
ac22d65
update logic and refactor
picnixz Feb 13, 2024
822aa88
remove CHANGELOG entry until 8.x
picnixz Feb 14, 2024
6d60665
implement intersphinx new format
picnixz Feb 14, 2024
1e2b875
Merge branch 'master' into fix/11466-intersphinx-inventory-consistency
picnixz Feb 14, 2024
760e4fc
cleanup
picnixz Feb 14, 2024
eea6a9d
cleanup 3.9
picnixz Feb 14, 2024
9cdd373
remove typing_extensions dependency
picnixz Feb 14, 2024
ae80634
cleanup comment
picnixz Feb 14, 2024
d4b4227
Merge branch 'sphinx-doc:master' into fix/11466-intersphinx-inventory…
picnixz Mar 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 129 additions & 94 deletions sphinx/ext/intersphinx.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import re
import sys
import time
from operator import itemgetter
from os import path
from typing import TYPE_CHECKING, cast
from urllib.parse import urlsplit, urlunsplit
Expand All @@ -44,7 +45,7 @@
if TYPE_CHECKING:
from collections.abc import Iterable
from types import ModuleType
from typing import IO, Any, Union
from typing import IO, Any, Optional

from docutils.nodes import Node, TextElement, system_message
from docutils.utils import Reporter
Expand All @@ -55,7 +56,20 @@
from sphinx.environment import BuildEnvironment
from sphinx.util.typing import Inventory, InventoryItem, RoleFunction

InventoryCacheEntry = tuple[Union[str, None], int, Inventory]
#: The inventory external URL.
#:
#: This value is unique in :confval:`intersphinx_mapping`.
InventoryURI = str

#: The inventory name. It is unique and in one-to-one correspondence
#: with an inventory remote URL.
InventoryName = str

#: The different targets containing the inventory data. When falsy,
#: this indicates the default inventory file.
InventoryLocation = Optional[str]

InventoryCacheEntry = tuple[InventoryName, int, Inventory]

logger = logging.getLogger(__name__)

Expand All @@ -74,13 +88,13 @@ def __init__(self, env: BuildEnvironment) -> None:
self.env.intersphinx_named_inventory = {} # type: ignore[attr-defined]

@property
def cache(self) -> dict[str, InventoryCacheEntry]:
def cache(self) -> dict[InventoryURI, InventoryCacheEntry]:
"""Intersphinx cache.

- Key is the URI of the remote inventory
- Element one is the key given in the Sphinx intersphinx_mapping
configuration value
- Element two is a time value for cache invalidation, a float
- Element two is a time value for cache invalidation, an integer.
- Element three is the loaded remote inventory, type Inventory
"""
return self.env.intersphinx_cache # type: ignore[attr-defined]
Expand Down Expand Up @@ -209,86 +223,101 @@ def fetch_inventory(app: Sphinx, uri: str, inv: str) -> Inventory:


def fetch_inventory_group(
name: str | None,
uri: str,
invs: tuple[str | None, ...],
cache: dict[str, InventoryCacheEntry],
name: InventoryName,
uri: InventoryURI,
locations: tuple[InventoryLocation, ...],
cache: dict[InventoryURI, InventoryCacheEntry],
app: Sphinx,
now: int,
) -> bool:
cache_time = now - app.config.intersphinx_cache_limit * 86400

def should_store(uri: str, inv: str) -> bool:
# decide whether the inventory must be read: always read local
# files; remote ones only if the cache time is expired
return '://' not in inv or uri not in cache or cache[uri][1] < cache_time

updated = False
failures = []
try:
for inv in invs:
if not inv:
inv = posixpath.join(uri, INVENTORY_FILENAME)
# decide whether the inventory must be read: always read local
# files; remote ones only if the cache time is expired
if '://' not in inv or uri not in cache or cache[uri][1] < cache_time:
safe_inv_url = _get_safe_url(inv)
logger.info(__('loading intersphinx inventory from %s...'), safe_inv_url)
try:
invdata = fetch_inventory(app, uri, inv)
except Exception as err:
failures.append(err.args)
continue
if invdata:
cache[uri] = name, now, invdata
return True
return False
finally:
if failures == []:
pass
elif len(failures) < len(invs):
logger.info(__("encountered some issues with some of the inventories,"
" but they had working alternatives:"))
for fail in failures:
logger.info(*fail)
else:
issues = '\n'.join(f[0] % f[1:] for f in failures)
logger.warning(__("failed to reach any of the inventories "
"with the following issues:") + "\n" + issues)

for location in locations:
inv: str = location or posixpath.join(uri, INVENTORY_FILENAME)
if not should_store(uri, inv):
continue

safe_inv_url = _get_safe_url(inv)
logger.info(__('loading intersphinx inventory from %s...'), safe_inv_url)

try:
invdata = fetch_inventory(app, uri, inv)
except Exception as err:
failures.append(err.args)
continue

if invdata:
cache[uri] = name, now, invdata
updated = True
break

if not failures:
pass
elif len(failures) < len(locations):
logger.info(__("encountered some issues with some of the inventories,"
" but they had working alternatives:"))
for fail in failures:
logger.info(*fail)
else:
issues = '\n'.join(f[0] % f[1:] for f in failures)
logger.warning(__("failed to reach any of the inventories "
"with the following issues:") + "\n" + issues)
return updated


def load_mappings(app: Sphinx) -> None:
"""Load all intersphinx mappings into the environment."""
"""Load the (normalized) intersphinx mappings into the environment."""
now = int(time.time())
inventories = InventoryAdapter(app.builder.env)
intersphinx_cache: dict[str, InventoryCacheEntry] = inventories.cache
intersphinx_cache: dict[InventoryURI, InventoryCacheEntry] = inventories.cache
intersphinx_mapping = app.config.intersphinx_mapping

expected_uris = {uri for uri, _invs in app.config.intersphinx_mapping.values()}

# If the current cache contains some (project, uri) pair
# say ("foo", "foo.com") and if the new intersphinx dict
# contains the pair ("foo", "bar.com"), we need to remove
# the ("foo", "foo.com") entry and use ("foo", "bar.com").
for uri in frozenset(intersphinx_cache):
if intersphinx_cache[uri][0] not in intersphinx_mapping or uri not in expected_uris:
# remove a cached inventory if the latter is no more used by intersphinx
del intersphinx_cache[uri]

with concurrent.futures.ThreadPoolExecutor() as pool:
futures = []
name: str | None
uri: str
invs: tuple[str | None, ...]
for name, (uri, invs) in app.config.intersphinx_mapping.values():
futures.append(pool.submit(
fetch_inventory_group, name, uri, invs, intersphinx_cache, app, now,
))
futures = [
pool.submit(fetch_inventory_group, name, uri, invs, intersphinx_cache, app, now)
for name, (uri, invs) in app.config.intersphinx_mapping.values()
]
updated = [f.result() for f in concurrent.futures.as_completed(futures)]

if any(updated):
# clear the local inventories
inventories.clear()

# Duplicate values in different inventories will shadow each
# other; which one will override which can vary between builds
# since they are specified using an unordered dict. To make
# it more consistent, we sort the named inventories and then
# add the unnamed inventories last. This means that the
# unnamed inventories will shadow the named ones but the named
# ones can still be accessed when the name is specified.
named_vals = []
unnamed_vals = []
for name, _expiry, invdata in intersphinx_cache.values():
if name:
named_vals.append((name, invdata))
else:
unnamed_vals.append((name, invdata))
for name, invdata in sorted(named_vals) + unnamed_vals:
if name:
inventories.named_inventory[name] = invdata
for type, objects in invdata.items():
inventories.main_inventory.setdefault(type, {}).update(objects)
# other and which one will override which varies between builds.
#
# We can however order the cache by URIs for reproducibility.
intersphinx_cache_values = sorted(intersphinx_cache.values(), key=itemgetter(0, 1))
for name, _timeout, invdata in intersphinx_cache_values:
if not name:
logger.warning(
__('intersphinx cache seems corrupted, please rebuild '
'the project with the "-E" option (see sphinx --help)'),
)
continue

inventories.named_inventory[name] = invdata
for objtype, objects in invdata.items():
inventories.main_inventory.setdefault(objtype, {}).update(objects)


def _create_element_from_result(domain: Domain, inv_name: str | None,
Expand Down Expand Up @@ -649,36 +678,42 @@ def install_dispatcher(app: Sphinx, docname: str, source: list[str]) -> None:


def normalize_intersphinx_mapping(app: Sphinx, config: Config) -> None:
for key, value in config.intersphinx_mapping.copy().items():
try:
if isinstance(value, (list, tuple)):
# new format
name, (uri, inv) = key, value
if not isinstance(name, str):
logger.warning(__('intersphinx identifier %r is not string. Ignored'),
name)
config.intersphinx_mapping.pop(key)
continue
else:
# old format, no name
# xref RemovedInSphinx80Warning
name, uri, inv = None, key, value
msg = (
"The pre-Sphinx 1.0 'intersphinx_mapping' format is "
"deprecated and will be removed in Sphinx 8. Update to the "
"current format as described in the documentation. "
f"Hint: \"intersphinx_mapping = {{'<name>': {(uri, inv)!r}}}\"."
"https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#confval-intersphinx_mapping" # NoQA: E501
)
logger.warning(msg)
# URIs should NOT be duplicated, otherwise different builds may use
# different project names (and thus, the build are no more reproducible)
# depending on which one is inserted last in the cache.
seen: dict[InventoryURI, InventoryName] = {}

for name, value in config.intersphinx_mapping.copy().items():
if not isinstance(name, str):
logger.warning(__('intersphinx identifier %r is not string. Ignored'), name)
del config.intersphinx_mapping[name]
continue

if not isinstance(inv, tuple):
config.intersphinx_mapping[key] = (name, (uri, (inv,)))
else:
config.intersphinx_mapping[key] = (name, (uri, inv))
if not isinstance(value, (tuple, list)):
logger.warning(__('intersphinx_mapping[%r]: invalid format. Ignored'), name)
del config.intersphinx_mapping[name]
continue

try:
uri, inv = value
except Exception as exc:
logger.warning(__('Failed to read intersphinx_mapping[%s], ignored: %r'), key, exc)
config.intersphinx_mapping.pop(key)
logger.warning(
__('Failed to read intersphinx_mapping[%s], ignored: %r'), name, exc,
)
del config.intersphinx_mapping[name]
continue

if (name_for_uri := seen.setdefault(uri, name)) != name:
logger.warning(__(
'conflicting URI %r for intersphinx_mapping[%r] and intersphinx_mapping[%r]',
), uri, name, name_for_uri)
del config.intersphinx_mapping[name]
continue

if isinstance(inv, (tuple, list)):
config.intersphinx_mapping[name] = (name, (uri, tuple(inv)))
else:
config.intersphinx_mapping[name] = (name, (uri, (inv,)))


def setup(app: Sphinx) -> dict[str, Any]:
Expand Down
1 change: 1 addition & 0 deletions sphinx/util/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def is_invalid_builtin_class(obj: Any) -> bool:
str, # URL
str, # display name
]
# referencable role => (reference name => inventory item)
Inventory = dict[str, dict[str, InventoryItem]]


Expand Down
2 changes: 1 addition & 1 deletion tests/test_domains/test_domain_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ def test_domain_c_build_intersphinx(tmp_path, app, status, warning):
_var c:member 1 index.html#c.$ -
''')) # NoQA: W291
app.config.intersphinx_mapping = {
'https://localhost/intersphinx/c/': str(inv_file),
'c': ('https://localhost/intersphinx/c/', str(inv_file)),
}
app.config.intersphinx_cache_limit = 0
# load the inventory and check if it's done correctly
Expand Down
2 changes: 1 addition & 1 deletion tests/test_domains/test_domain_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@ def test_domain_cpp_build_intersphinx(tmp_path, app, status, warning):
_var cpp:member 1 index.html#_CPPv44$ -
''')) # NoQA: W291
app.config.intersphinx_mapping = {
'https://localhost/intersphinx/cpp/': str(inv_file),
'cpp': ('https://localhost/intersphinx/cpp/', str(inv_file)),
}
app.config.intersphinx_cache_limit = 0
# load the inventory and check if it's done correctly
Expand Down
4 changes: 1 addition & 3 deletions tests/test_extensions/test_ext_inheritance_diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,7 @@ def new_run(self):
def test_inheritance_diagram_png_html(tmp_path, app):
inv_file = tmp_path / 'inventory'
inv_file.write_bytes(external_inventory)
app.config.intersphinx_mapping = {
'https://example.org': str(inv_file),
}
app.config.intersphinx_mapping = {'example': ('https://example.com', str(inv_file))}
app.config.intersphinx_cache_limit = 0
normalize_intersphinx_mapping(app, app.config)
load_mappings(app)
Expand Down
Loading