From b780c80b92d2386ce5c6bb01360a046de8d3c91c Mon Sep 17 00:00:00 2001 From: Rohan Devasthale Date: Wed, 2 Oct 2024 12:01:56 -0400 Subject: [PATCH] Initial commit for poc --- requirements.txt | 1 + src/fromager/candidate.py | 3 +++ src/fromager/resolver.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/requirements.txt b/requirements.txt index 90212171..ce6607e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,4 @@ tomlkit tqdm virtualenv wheel +mousebender diff --git a/src/fromager/candidate.py b/src/fromager/candidate.py index e7a824a3..4bd3d74d 100644 --- a/src/fromager/candidate.py +++ b/src/fromager/candidate.py @@ -25,6 +25,7 @@ def __init__( name: str, version: Version, url: str, + metadata_hash: tuple[str, str] | None, extras: typing.Iterable[str] | None = None, is_sdist: bool | None = None, build_tag: BuildTag = (), @@ -35,9 +36,11 @@ def __init__( self.extras = extras self.is_sdist = is_sdist self.build_tag = build_tag + self.metadata_hash = metadata_hash self._metadata: Metadata | None = None self._dependencies: list[Requirement] | None = None + self.metadata_url: str | None = None def __repr__(self) -> str: if not self.extras: diff --git a/src/fromager/resolver.py b/src/fromager/resolver.py index b8c3ca56..15f04540 100644 --- a/src/fromager/resolver.py +++ b/src/fromager/resolver.py @@ -11,6 +11,7 @@ from operator import attrgetter from platform import python_version from urllib.parse import urljoin, urlparse +import mousebender import github import html5lib @@ -105,7 +106,19 @@ def get_project_from_pypi( simple_index_url = sdist_server_url.rstrip("/") + "/" + project + "/" logger.debug("%s: getting available versions from %s", project, simple_index_url) data = session.get(simple_index_url).content + metadata_response = session.get(simple_index_url, headers={"accept": mousebender.simple.ACCEPT_SUPPORTED}) doc = html5lib.parse(data, namespaceHTMLElements=False) + parsed_metadata_doc = mousebender.simple.parse_project_details(metadata_response.content, metadata_response.headers["content-type"], project) + files = parsed_metadata_doc["files"] + + for dct in files: + if dct.get("yanked", False): + continue + hash_val = dct.get("data-dist-info-metadata") + if hash_val: + _metadata_hash = hash_val + _metadata_url = dct.get("url") + ".metadata" + for i in doc.findall(".//a"): candidate_url = urljoin(simple_index_url, i.attrib["href"]) py_req = i.attrib.get("data-requires-python") @@ -174,10 +187,12 @@ def get_project_from_pypi( name, version, url=candidate_url, + metadata_hash=_metadata_hash, extras=extras, is_sdist=is_sdist, build_tag=build_tag, ) + c.metadata_url = _metadata_url if DEBUG_RESOLVER: logger.debug( "%s: candidate %s (%s) %s", project, filename, c, candidate_url