diff mbox series

[v4] fetch2/crate: support configurable registry and index URLs

Message ID 20260604212153.79056-1-ms98.cho@gmail.com
State New
Headers show
Series [v4] fetch2/crate: support configurable registry and index URLs | expand

Commit Message

minsung.cho June 4, 2026, 9:19 p.m. UTC
The crate fetcher hardcoded the crates.io download URL and assumed every
other host served a fixed /name/versions JSON API. Recipes pointing at a
private registry or a Cargo sparse index mirror just did not work.

Two new per-host varflags fix this: BB_CRATE_REGISTRY_URL[host] and
BB_CRATE_INDEX_URL[host]. Both take {crate} and {version} placeholders;
the index template additionally takes {index_path}, and its presence is
how the fetcher knows to treat the response as a Cargo sparse index
(NDJSON) rather than as a JSON versions API. The crates.io defaults are
expressed with the same templates, so there is no separate code path for
that host.

Tests cover the crates.io defaults, custom API and sparse templates, both
latest-version parser paths, and an end-to-end fetch+unpack against a
local sparse registry served over HTTP.

[YOCTO #16276]

Signed-off-by: minsung.cho <ms98.cho@gmail.com>
---
v4: address Ross Burton's review of v3
  - build the download/index URLs with str.format() instead of chained
    .replace() calls, as the templates are already f-string shaped
  - express the crates.io index default with the {index_path} template
    too, so the generic path handles it and the crates.io-specific
    index branch (and its trailing-slash handling) is gone
  - latest_versionstring() dispatches on ud.crate_index_format directly
    instead of a getattr() guard; it is always set in _crate_urldata_init
  - dropped the separate test-cleanup chmod tweak: master already
    converted that call to an argument list
  - dropped the test for the removed trailing-slash branch
  Rebased onto master (resolved against the filter_regex series so
  latest_versionstring keeps the filter_regex parameter).

 lib/bb/fetch2/crate.py |  29 +++++++++--
 lib/bb/tests/fetch.py  | 112 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/fetch2/crate.py b/lib/bb/fetch2/crate.py
index 8f928ea..fe8e70a 100644
--- a/lib/bb/fetch2/crate.py
+++ b/lib/bb/fetch2/crate.py
@@ -78,12 +78,33 @@  class Crate(Wget):
         # host (this is to allow custom crate registries to be specified
         host = '/'.join(parts[2:-2])
 
-        # If using crates.io use the CDN directly as per https://crates.io/data-access
+        # Allow overriding the registry and index URLs per host via varflags.
+        # The templates accept {crate}, {version} and {index_path} placeholders:
+        #   BB_CRATE_REGISTRY_URL[my-host] = "https://my-host/crates/{crate}/{version}/download"
+        #   BB_CRATE_INDEX_URL[my-host] = "https://my-host/index/{index_path}"
+        # A {index_path} placeholder marks a Cargo sparse index; without it the
+        # index URL is treated as a JSON versions API endpoint.
+        dl_url = d.getVarFlag('BB_CRATE_REGISTRY_URL', host)
+        index_url = d.getVarFlag('BB_CRATE_INDEX_URL', host)
+
+        # crates.io uses the CDN directly as per https://crates.io/data-access
         if host == 'crates.io':
-            ud.url = "https://static.crates.io/crates/%s/%s/download" % (name, version)
-            ud.versionsurl = 'https://index.crates.io/' + self._generate_index_path(name)
+            if not dl_url:
+                dl_url = "https://static.crates.io/crates/{crate}/{version}/download"
+            if not index_url:
+                index_url = "https://index.crates.io/{index_path}"
+
+        if dl_url:
+            ud.url = dl_url.format(crate=name, version=version)
         else:
             ud.url = "https://%s/%s/%s/download" % (host, name, version)
+
+        if index_url:
+            ud.crate_index_format = 'sparse' if '{index_path}' in index_url else 'api'
+            ud.versionsurl = index_url.format(crate=name, version=version,
+                                              index_path=self._generate_index_path(name))
+        else:
+            ud.crate_index_format = 'api'
             ud.versionsurl = "https://%s/%s/versions" % (host, name)
 
         ud.parm['downloadfilename'] = "%s-%s.crate" % (name, version)
@@ -161,7 +182,7 @@  class Crate(Wget):
         Return the latest upstream version, dispatching to the appropriate
         parser based on the versionsurl format.
         """
-        if ud.versionsurl.startswith('https://index.crates.io/'):
+        if ud.crate_index_format == 'sparse':
             return self._latest_versionstring_from_index(ud, d, filter_regex)
         return self._latest_versionstring_from_api(ud, d, filter_regex)
 
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 95cf6c4..c4a1f33 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -2756,6 +2756,118 @@  class FetchLocallyMissingTagFromRemote(FetcherTest):
 
 
 class CrateTest(FetcherTest):
+    def test_crate_url_uses_crates_io_defaults(self):
+        ud = bb.fetch2.FetchData("crate://crates.io/glob/0.2.11", self.d)
+
+        self.assertEqual(ud.url,
+                         "https://static.crates.io/crates/glob/0.2.11/download")
+        self.assertEqual(ud.versionsurl, "https://index.crates.io/gl/ob/glob")
+        self.assertEqual(ud.crate_index_format, "sparse")
+        self.assertEqual(ud.parm["downloadfilename"], "glob-0.2.11.crate")
+        self.assertEqual(ud.parm["name"], "glob-0.2.11")
+
+    def test_crate_url_supports_custom_registry_templates(self):
+        self.d.setVarFlag("BB_CRATE_REGISTRY_URL", "registry.example.com",
+                          "https://registry.example.com/api/v1/crates/{crate}/{version}/download")
+        self.d.setVarFlag("BB_CRATE_INDEX_URL", "registry.example.com",
+                          "https://registry.example.com/api/v1/crates/{crate}/versions")
+
+        ud = bb.fetch2.FetchData("crate://registry.example.com/glob/0.2.11", self.d)
+
+        self.assertEqual(ud.url,
+                         "https://registry.example.com/api/v1/crates/glob/0.2.11/download")
+        self.assertEqual(ud.versionsurl,
+                         "https://registry.example.com/api/v1/crates/glob/versions")
+        self.assertEqual(ud.crate_index_format, "api")
+
+    def test_crate_url_supports_custom_sparse_index_templates(self):
+        self.d.setVarFlag("BB_CRATE_REGISTRY_URL", "registry.example.com",
+                          "https://registry.example.com/crates/{crate}/{version}/download")
+        self.d.setVarFlag("BB_CRATE_INDEX_URL", "registry.example.com",
+                          "https://registry.example.com/index/{index_path}")
+
+        ud = bb.fetch2.FetchData("crate://registry.example.com/aho-corasick/0.7.20", self.d)
+
+        self.assertEqual(ud.url,
+                         "https://registry.example.com/crates/aho-corasick/0.7.20/download")
+        self.assertEqual(ud.versionsurl,
+                         "https://registry.example.com/index/ah/o-/aho-corasick")
+        self.assertEqual(ud.crate_index_format, "sparse")
+
+    def test_crate_latest_versionstring_supports_custom_sparse_index(self):
+        self.d.setVarFlag("BB_CRATE_INDEX_URL", "registry.example.com",
+                          "https://registry.example.com/index/{index_path}")
+        ud = bb.fetch2.FetchData("crate://registry.example.com/glob/0.2.11", self.d)
+        index = '\n'.join([
+            '{"vers":"0.2.10","yanked":false}',
+            '{"vers":"0.2.11","yanked":true}',
+            '{"vers":"0.2.12","yanked":false}',
+        ])
+
+        with unittest.mock.patch("bb.fetch2.crate.Crate._fetch_index", return_value=index):
+            self.assertEqual(ud.method.latest_versionstring(ud, self.d), ("0.2.12", ""))
+
+    def test_crate_latest_versionstring_supports_custom_api_index(self):
+        self.d.setVarFlag("BB_CRATE_INDEX_URL", "registry.example.com",
+                          "https://registry.example.com/api/v1/crates/{crate}/versions")
+        ud = bb.fetch2.FetchData("crate://registry.example.com/glob/0.2.11", self.d)
+        index = '{"versions":[{"num":"0.2.10"},{"num":"0.2.12"}]}'
+
+        with unittest.mock.patch("bb.fetch2.crate.Crate._fetch_index", return_value=index):
+            self.assertEqual(ud.method.latest_versionstring(ud, self.d), ("0.2.12", ""))
+
+    def test_crate_fetches_from_local_sparse_registry(self):
+        registry = os.path.join(self.tempdir, "registry")
+        crate_name = "dummycrate"
+        crate_version = "1.0.0"
+        crate_basename = "%s-%s" % (crate_name, crate_version)
+        crate_path = os.path.join(registry, "crates", crate_name,
+                                  crate_version, "download")
+        index_path = os.path.join(registry, "index", "du", "mm", crate_name)
+        source_dir = os.path.join(self.tempdir, "crate-source", crate_basename)
+        bb.utils.mkdirhier(os.path.join(source_dir, "src"))
+        bb.utils.mkdirhier(os.path.dirname(crate_path))
+        bb.utils.mkdirhier(os.path.dirname(index_path))
+
+        with open(os.path.join(source_dir, "Cargo.toml"), "w") as f:
+            f.write('[package]\nname = "%s"\nversion = "%s"\n' %
+                    (crate_name, crate_version))
+        with open(os.path.join(source_dir, "src", "lib.rs"), "w") as f:
+            f.write("pub fn answer() -> u32 { 42 }\n")
+        with tarfile.open(crate_path, "w:gz") as tar:
+            tar.add(source_dir, arcname=crate_basename)
+        with open(crate_path, "rb") as f:
+            crate_checksum = hashlib.sha256(f.read()).hexdigest()
+        with open(index_path, "w") as f:
+            f.write('{"name":"%s","vers":"%s","yanked":false}\n' %
+                    (crate_name, crate_version))
+
+        server = HTTPService(registry, host="127.0.0.1")
+        server.start()
+        try:
+            host = "127.0.0.1:%s" % server.port
+            self.d.setVarFlag("BB_CRATE_REGISTRY_URL", host,
+                              "http://%s/crates/{crate}/{version}/download" % host)
+            self.d.setVarFlag("BB_CRATE_INDEX_URL", host,
+                              "http://%s/index/{index_path}" % host)
+            self.d.setVarFlag("SRC_URI", "%s.sha256sum" % crate_basename,
+                              crate_checksum)
+            uri = "crate://%s/%s/%s" % (host, crate_name, crate_version)
+
+            fetcher = bb.fetch2.Fetch([uri], self.d)
+            ud = fetcher.ud[fetcher.urls[0]]
+            self.assertEqual(ud.crate_index_format, "sparse")
+            self.assertEqual(ud.method.latest_versionstring(ud, self.d),
+                             (crate_version, ""))
+
+            fetcher.download()
+            fetcher.unpack(self.tempdir)
+            unpacked_file = os.path.join(self.tempdir, "cargo_home", "bitbake",
+                                         crate_basename, "src", "lib.rs")
+            self.assertTrue(os.path.exists(unpacked_file))
+        finally:
+            server.stop()
+
     @skipIfNoNetwork()
     def test_crate_url(self):