Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions python/private/pypi/extension.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
# versions.
pip_hub_map = {}
simpleapi_cache = {}
facts = {}

for mod in module_ctx.modules:
for pip_attr in mod.tags.parse:
Expand All @@ -240,6 +241,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
evaluate_markers_fn = kwargs.get("evaluate_markers", None),
available_interpreters = kwargs.get("available_interpreters", INTERPRETER_LABELS),
logger = repo_utils.logger(module_ctx, "pypi:hub:" + hub_name),
facts = facts,
)
pip_hub_map[pip_attr.hub_name] = builder
elif pip_hub_map[hub_name].module_name != mod.name:
Expand Down Expand Up @@ -286,6 +288,25 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_group_map[hub.name] = out.group_map
hub_whl_map[hub.name] = out.whl_map

facts = {
"fact_version": facts.get("fact_version"),
} | {
index_url: {
k: _sorted_dict(f.get(k))
for k in [
"dist_filenames",
"dist_hashes",
"dist_yanked",
]
if f.get(k)
}
for index_url, f in facts.items()
if index_url not in ["fact_version"]
}
if len(facts) == 1:
# only version is present, skip writing
facts = None

return struct(
config = config,
exposed_packages = exposed_packages,
Expand All @@ -294,6 +315,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
hub_whl_map = hub_whl_map,
whl_libraries = whl_libraries,
whl_mods = whl_mods,
facts = facts,
platform_config_settings = {
hub_name: {
platform_name: sorted([str(Label(cv)) for cv in p.config_settings])
Expand All @@ -303,6 +325,12 @@ You cannot use both the additive_build_content and additive_build_content_file a
},
)

def _sorted_dict(d):
if not d:
return {}

return {k: v for k, v in sorted(d.items())}

def _pip_impl(module_ctx):
"""Implementation of a class tag that creates the pip hub and corresponding pip spoke whl repositories.

Expand Down Expand Up @@ -391,9 +419,11 @@ def _pip_impl(module_ctx):
groups = mods.hub_group_map.get(hub_name),
)

return module_ctx.extension_metadata(
reproducible = True,
)
kwargs = {"reproducible": True}
if mods.facts:
kwargs["facts"] = mods.facts

return module_ctx.extension_metadata(**kwargs)

_default_attrs = {
"arch_name": attr.string(
Expand Down
10 changes: 9 additions & 1 deletion python/private/pypi/hub_builder.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def hub_builder(
simpleapi_download_fn,
evaluate_markers_fn,
logger,
facts = None,
simpleapi_cache = {}):
"""Return a hub builder instance

Expand All @@ -47,6 +48,7 @@ def hub_builder(
used during the `repository_rule` and must be always compatible with the host.
simpleapi_download_fn: the function used to download from SimpleAPI.
simpleapi_cache: the cache for the download results.
facts: the facts if they are available.
logger: the logger for this builder.
"""

Expand All @@ -69,6 +71,7 @@ def hub_builder(
_platforms = {},
_group_name_by_whl = {},
_get_index_urls = {},
_facts = facts,
_use_downloader = {},
_simpleapi_cache = simpleapi_cache,
# instance constants
Expand Down Expand Up @@ -335,11 +338,16 @@ def _set_get_index_urls(self, pip_attr):
d
for d in distributions
if _use_downloader(self, python_version, d)
],
] if type(distributions) == "list" else {
d: versions
for d, versions in distributions.items()
if _use_downloader(self, python_version, d)
},
envsubst = pip_attr.envsubst,
# Auth related info
netrc = pip_attr.netrc,
auth_patterns = pip_attr.auth_patterns,
facts = self._facts,
),
cache = self._simpleapi_cache,
parallel_download = pip_attr.parallel_download,
Expand Down
19 changes: 9 additions & 10 deletions python/private/pypi/parse_requirements.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,15 @@ def parse_requirements(

index_urls = {}
if get_index_urls:
index_urls = get_index_urls(
ctx,
# Use list({}) as a way to have a set
list({
req.distribution: None
for reqs in requirements_by_platform.values()
for req in reqs.values()
if not req.srcs.url
}),
)
distributions = {}
for reqs in requirements_by_platform.values():
for req in reqs.values():
if req.srcs.url:
continue

distributions.setdefault(req.distribution, []).append(req.srcs.version)

index_urls = get_index_urls(ctx, distributions)

ret = []
for name, reqs in sorted(requirements_by_platform.items()):
Expand Down
77 changes: 57 additions & 20 deletions python/private/pypi/parse_simpleapi_html.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
Parse SimpleAPI HTML in Starlark.
"""

def parse_simpleapi_html(*, url, content):
def parse_simpleapi_html(*, url, content, distribution = None, return_absolute = True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There are a few TODOs in the docstrings for new and modified functions in this file that should be addressed:

  • parse_simpleapi_html (lines 24, 26): The distribution and return_absolute parameters need descriptions.
  • pkg_version (lines 129, 130): The filename and distribution parameters need descriptions.

Please fill these in to improve the documentation.

"""Get the package URLs for given shas by parsing the Simple API HTML.

Args:
url(str): The URL that the HTML content can be downloaded from.
distribution(str): TODO
content(str): The Simple API HTML content.
return_absolute: {type}`bool` TODO

Returns:
A list of structs with:
Expand All @@ -33,6 +35,9 @@ def parse_simpleapi_html(*, url, content):
present, then the 'metadata_url' is also present. Defaults to "".
* metadata_url: The URL for the METADATA if we can download it. Defaults to "".
"""
if not distribution:
_, _, distribution = url.strip("/").rpartition("/")

sdists = {}
whls = {}
lines = content.split("<a href=\"")
Expand All @@ -55,7 +60,8 @@ def parse_simpleapi_html(*, url, content):
sha256s_by_version = {}
for line in lines[1:]:
dist_url, _, tail = line.partition("#sha256=")
dist_url = _absolute_url(url, dist_url)
if return_absolute:
dist_url = absolute_url(index_url = url, url = dist_url)

sha256, _, tail = tail.partition("\"")

Expand All @@ -64,7 +70,7 @@ def parse_simpleapi_html(*, url, content):

head, _, _ = tail.rpartition("</a>")
maybe_metadata, _, filename = head.rpartition(">")
version = _version(filename)
version = pkg_version(filename, distribution)
sha256s_by_version.setdefault(version, []).append(sha256)

metadata_sha256 = ""
Expand All @@ -79,13 +85,17 @@ def parse_simpleapi_html(*, url, content):
break

if filename.endswith(".whl"):
metadata_url = metadata_url or ""
if return_absolute and metadata_url:
metadata_url = absolute_url(index_url = url, url = metadata_url)

whls[sha256] = struct(
filename = filename,
version = version,
url = dist_url,
sha256 = sha256,
metadata_sha256 = metadata_sha256,
metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
metadata_url = metadata_url,
yanked = yanked,
)
else:
Expand All @@ -110,18 +120,36 @@ _SDIST_EXTS = [
".zip",
]

def _version(filename):
def pkg_version(filename, distribution = None):
"""pkg_version extracts the version from the filename.

TODO: move this to a different location

Args:
filename: TODO
distribution: TODO

Returns:
version string
"""
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format

_, _, tail = filename.partition("-")
version, _, _ = tail.partition("-")
if version != tail:
# The format is {name}-{version}-{whl_specifiers}.whl
return version
if filename.endswith(".whl"):
_, _, tail = filename.partition("-")
version, _, _ = tail.partition("-")
if version != tail:
# The format is {name}-{version}-{whl_specifiers}.whl
return version

if not distribution:
fail("for parsing sdists passing 'distribution' is mandatory")

# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path

# {name}-{version}.{ext}
# TODO @aignas 2026-01-20: test for handling dashes in names, can't think of any other way to
# get the version from the filename but to pass in the distribution name to this function.
version = filename[len(distribution) + 1:]
for ext in _SDIST_EXTS:
version, _, _ = version.partition(ext) # build or name

Expand All @@ -147,26 +175,35 @@ def _is_downloadable(url):
"""
return url.startswith("http://") or url.startswith("https://") or url.startswith("file://")

def _absolute_url(index_url, candidate):
if candidate == "":
return candidate
def absolute_url(*, index_url, url):
"""Return an absolute URL in case the url is not absolute.

Args:
index_url: {type}`str` The index_url.
url: {type}`str` The url of the artifact.

Returns:
`url` if it is absolute, or absolute URL based on the `index_url`.
"""
if url == "":
return url

if _is_downloadable(candidate):
return candidate
if _is_downloadable(url):
return url

if candidate.startswith("/"):
if url.startswith("/"):
# absolute path
root_directory = _get_root_directory(index_url)
return "{}{}".format(root_directory, candidate)
return "{}{}".format(root_directory, url)

if candidate.startswith(".."):
if url.startswith(".."):
# relative path with up references
candidate_parts = candidate.split("..")
candidate_parts = url.split("..")
last = candidate_parts[-1]
for _ in range(len(candidate_parts) - 1):
index_url, _, _ = index_url.rstrip("/").rpartition("/")

return "{}/{}".format(index_url, last.strip("/"))

# relative path without up-references
return "{}/{}".format(index_url.rstrip("/"), candidate)
return "{}/{}".format(index_url.rstrip("/"), url)
Loading