From 87b4369b6930f329c7e660754f4ee73f5e3af4b5 Mon Sep 17 00:00:00 2001 From: Rohan Devasthale Date: Tue, 1 Oct 2024 11:22:41 -0400 Subject: [PATCH 1/3] PEP 658 metadata download to speed up builds --- src/fromager/resolver.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/fromager/resolver.py b/src/fromager/resolver.py index b8c3ca56..a7a14a40 100644 --- a/src/fromager/resolver.py +++ b/src/fromager/resolver.py @@ -7,7 +7,10 @@ import logging import os +import pathlib import typing +from email.message import EmailMessage +from email.parser import BytesParser from operator import attrgetter from platform import python_version from urllib.parse import urljoin, urlparse @@ -106,9 +109,11 @@ def get_project_from_pypi( logger.debug("%s: getting available versions from %s", project, simple_index_url) data = session.get(simple_index_url).content doc = html5lib.parse(data, namespaceHTMLElements=False) + metadata_content = EmailMessage() for i in doc.findall(".//a"): candidate_url = urljoin(simple_index_url, i.attrib["href"]) py_req = i.attrib.get("data-requires-python") + metadata = i.attrib.get("data-dist-info-metadata") path = urlparse(candidate_url).path filename = path.rsplit("/", 1)[-1] if DEBUG_RESOLVER: @@ -132,6 +137,17 @@ def get_project_from_pypi( ) continue + if metadata: + outfile: pathlib.Path + metadata_url = urljoin(metadata, i.attrib["href"]) + with session.get(metadata_url, stream=True) as r: + r.raise_for_status() + with open(outfile, "wb") as f: + for chunk in r.iter_content(chunk_size=1024 * 1024): + f.write(chunk) + p = BytesParser() + metadata_content = p.parse(outfile, headersonly=True) + # TODO: Handle compatibility tags? try: @@ -178,6 +194,7 @@ def get_project_from_pypi( is_sdist=is_sdist, build_tag=build_tag, ) + c._metadata = metadata_content if DEBUG_RESOLVER: logger.debug( "%s: candidate %s (%s) %s", project, filename, c, candidate_url From b1bd88856a39453b2734c28627658e5f695c52de Mon Sep 17 00:00:00 2001 From: Rohan Devasthale Date: Tue, 1 Oct 2024 14:48:59 -0400 Subject: [PATCH 2/3] Undo resolver changes --- src/fromager/resolver.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/fromager/resolver.py b/src/fromager/resolver.py index a7a14a40..b8c3ca56 100644 --- a/src/fromager/resolver.py +++ b/src/fromager/resolver.py @@ -7,10 +7,7 @@ import logging import os -import pathlib import typing -from email.message import EmailMessage -from email.parser import BytesParser from operator import attrgetter from platform import python_version from urllib.parse import urljoin, urlparse @@ -109,11 +106,9 @@ def get_project_from_pypi( logger.debug("%s: getting available versions from %s", project, simple_index_url) data = session.get(simple_index_url).content doc = html5lib.parse(data, namespaceHTMLElements=False) - metadata_content = EmailMessage() for i in doc.findall(".//a"): candidate_url = urljoin(simple_index_url, i.attrib["href"]) py_req = i.attrib.get("data-requires-python") - metadata = i.attrib.get("data-dist-info-metadata") path = urlparse(candidate_url).path filename = path.rsplit("/", 1)[-1] if DEBUG_RESOLVER: @@ -137,17 +132,6 @@ def get_project_from_pypi( ) continue - if metadata: - outfile: pathlib.Path - metadata_url = urljoin(metadata, i.attrib["href"]) - with session.get(metadata_url, stream=True) as r: - r.raise_for_status() - with open(outfile, "wb") as f: - for chunk in r.iter_content(chunk_size=1024 * 1024): - f.write(chunk) - p = BytesParser() - metadata_content = p.parse(outfile, headersonly=True) - # TODO: Handle compatibility tags? try: @@ -194,7 +178,6 @@ def get_project_from_pypi( is_sdist=is_sdist, build_tag=build_tag, ) - c._metadata = metadata_content if DEBUG_RESOLVER: logger.debug( "%s: candidate %s (%s) %s", project, filename, c, candidate_url From c50ddc784ea865593655a1ec139f213121b30ea0 Mon Sep 17 00:00:00 2001 From: Rohan Devasthale Date: Wed, 2 Oct 2024 09:19:40 -0400 Subject: [PATCH 3/3] Initial thoughts and implementation --- src/fromager/candidate.py | 6 ++++++ src/fromager/resolver.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/fromager/candidate.py b/src/fromager/candidate.py index e7a824a3..2fee169a 100644 --- a/src/fromager/candidate.py +++ b/src/fromager/candidate.py @@ -38,12 +38,18 @@ def __init__( self._metadata: Metadata | None = None self._dependencies: list[Requirement] | None = None + self.metadata_source: str | None = None def __repr__(self) -> str: if not self.extras: return f"<{self.name}=={self.version}>" return f"<{self.name}[{','.join(self.extras)}]=={self.version}>" + def _pep_658_metadata(self): + # This function will download the metadata file from metadata_source url. + # Where do we save this? We will use seesions.get(self.metadata_source, stream=True) + pass + @property def metadata(self) -> Metadata: if self._metadata is None: diff --git a/src/fromager/resolver.py b/src/fromager/resolver.py index b8c3ca56..f587910e 100644 --- a/src/fromager/resolver.py +++ b/src/fromager/resolver.py @@ -109,6 +109,9 @@ def get_project_from_pypi( for i in doc.findall(".//a"): candidate_url = urljoin(simple_index_url, i.attrib["href"]) py_req = i.attrib.get("data-requires-python") + # As per PEP 658, the metadata file will be present separately in anchor tag "data-dist-info-metadata" + candidate_metadata_attr = i.attrib.get("data-dist-info-metadata") + candidate_metadata_url = urljoin(simple_index_url, candidate_metadata_attr) path = urlparse(candidate_url).path filename = path.rsplit("/", 1)[-1] if DEBUG_RESOLVER: @@ -178,6 +181,7 @@ def get_project_from_pypi( is_sdist=is_sdist, build_tag=build_tag, ) + c.metadata_source = candidate_metadata_url if DEBUG_RESOLVER: logger.debug( "%s: candidate %s (%s) %s", project, filename, c, candidate_url