diff mbox series

fetch2/crate: use CDN endpoint for version checking if possible

Message ID 20260429144401.4110241-1-ross.burton@arm.com
State Accepted, archived
Commit cb28befc56d201cace72d70891e731b955fb567f
Headers show
Series fetch2/crate: use CDN endpoint for version checking if possible | expand

Commit Message

Ross Burton April 29, 2026, 2:44 p.m. UTC
If the crate host is crates.io then we can fetch the index for the crate
from index.crates.io instead of hitting the API.  This is the
recommended way to do automated checks and means we don't break the data
access policy[1] by not setting an explicit User-Agent.

[1] https://crates.io/data-access

Signed-off-by: Ross Burton <ross.burton@arm.com>
---
 lib/bb/fetch2/crate.py | 43 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/fetch2/crate.py b/lib/bb/fetch2/crate.py
index 2d30788998..b89817ab94 100644
--- a/lib/bb/fetch2/crate.py
+++ b/lib/bb/fetch2/crate.py
@@ -45,6 +45,17 @@  class Crate(Wget):
 
         super(Crate, self).urldata_init(ud, d)
 
+    def _generate_index_path(self, name):
+        # https://doc.rust-lang.org/cargo/reference/registry-index.html#index-files
+        if len(name) == 1:
+            return f"1/{name}"
+        elif len(name) == 2:
+            return f"2/{name}"
+        elif len(name) == 3:
+            return f"3/{name[0]}/{name}"
+        else:
+            return f"{name[0:2]}/{name[2:4]}/{name}"
+
     def _crate_urldata_init(self, ud, d):
         """
         Sets up the download for a crate
@@ -65,15 +76,15 @@  class Crate(Wget):
         # host (this is to allow custom crate registries to be specified
         host = '/'.join(parts[2:-2])
 
-        # if using upstream just fix it up nicely
+        # If using crates.io use the CDN directly as per https://crates.io/data-access
         if host == 'crates.io':
-            host = 'crates.io/api/v1/crates'
-            cdn_host = 'static.crates.io/crates'
+            ud.url = "https://static.crates.io/crates/%s/%s/download" % (name, version)
+            ud.versionsurl = 'https://index.crates.io/' + self._generate_index_path(name)
+            self.latest_versionstring = self.latest_versionstring_from_index
         else:
-            cdn_host = host
+            ud.url = "https://%s/%s/%s/download" % (host, name, version)
+            ud.versionsurl = "https://%s/%s/versions" % (host, name)
 
-        ud.url = "https://%s/%s/%s/download" % (cdn_host, name, version)
-        ud.versionsurl = "https://%s/%s/versions" % (host, name)
         ud.parm['downloadfilename'] = "%s-%s.crate" % (name, version)
         if 'name' not in ud.parm:
             ud.parm['name'] = '%s-%s' % (name, version)
@@ -145,9 +156,29 @@  class Crate(Wget):
                 json.dump(metadata, f)
 
     def latest_versionstring(self, ud, d):
+        """
+        Return the latest version available when versionsurl is the [name]/versions URL.
+        """
         from functools import cmp_to_key
         json_data = json.loads(self._fetch_index(ud.versionsurl, ud, d))
         versions = [(0, i["num"], "") for i in json_data["versions"]]
         versions = sorted(versions, key=cmp_to_key(bb.utils.vercmp))
 
         return (versions[-1][1], "")
+
+    def latest_versionstring_from_index(self, ud, d):
+        """
+        Return the latest version available when versionsurl is a Cargo index
+        file.
+        https://doc.rust-lang.org/cargo/reference/registry-index.html#index-files
+        """
+        from functools import cmp_to_key
+
+        versions = []
+        response = self._fetch_index(ud.versionsurl, ud, d)
+        for line in response.splitlines():
+            data = json.loads(line)
+            versions.append((0, data["vers"], ""))
+
+        versions = sorted(versions, key=cmp_to_key(bb.utils.vercmp))
+        return (versions[-1][1], "")