diff mbox series

[v1,2/5] fetch2/github_release_artifact: fetcher for (private) release artifacts

Message ID 20250225070631.2262115-2-l.goehrs@pengutronix.de
State New
Headers show
Series [v1,1/5] fetch2/wget: enable classes derived from Wget to set custom http headers | expand

Commit Message

Leonard Göhrs Feb. 25, 2025, 7:06 a.m. UTC
This fetcher enables downloading artifacts attached to GitHub releases
in _private repositories_ (public repositories can just use download URLs
like `https://github.com/rauc/rauc/releases/download/v1.13/rauc-1.13.tar.xz`
which work without authentication).

Authentication is provided using tokens. In the simplest form a personal
access token hardcoded in a recipe file:

  GH_TOKEN = "github_pat_...
  SRC_URI = "ghra://github.com/rauc/rauc/v1.13/rauc-1.13.tar.xz"
  SRC_URI[sha256sum] = "1ddb218a5d713c8dbd6e04d5501d96629f1c8e252157...

Or as part of the URI:

  SRC_URI = "ghra://github.com/rauc/rauc/v1.13/rauc-1.13.tar.xz;token=g...
  SRC_URI[sha256sum] = "1ddb218a5d713c8dbd6e04d5501d96629f1c8e252157...

Signed-off-by: Leonard Göhrs <l.goehrs@pengutronix.de>
---
 lib/bb/fetch2/__init__.py                |  4 +-
 lib/bb/fetch2/github_release_artifact.py | 93 ++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 lib/bb/fetch2/github_release_artifact.py

Comments

Alexander Kanavin Feb. 25, 2025, 8:03 a.m. UTC | #1
On Tue, 25 Feb 2025 at 08:06, Leonard Göhrs via lists.openembedded.org
<l.goehrs=pengutronix.de@lists.openembedded.org> wrote:
> Authentication is provided using tokens. In the simplest form a personal
> access token hardcoded in a recipe file:
>
>   GH_TOKEN = "github_pat_...
>   SRC_URI = "ghra://github.com/rauc/rauc/v1.13/rauc-1.13.tar.xz"
>   SRC_URI[sha256sum] = "1ddb218a5d713c8dbd6e04d5501d96629f1c8e252157...
>
> Or as part of the URI:
>
>   SRC_URI = "ghra://github.com/rauc/rauc/v1.13/rauc-1.13.tar.xz;token=g...
>   SRC_URI[sha256sum] = "1ddb218a5d713c8dbd6e04d5501d96629f1c8e252157...

This is really insecure. You need to find a way for wget to pick up
the needed token and produce a header directly from netrc or similar
file that is managed outside of layer metadata.

Alex
diff mbox series

Patch

diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py
index 64f87b3ec..70ae6a9e5 100644
--- a/lib/bb/fetch2/__init__.py
+++ b/lib/bb/fetch2/__init__.py
@@ -1293,7 +1293,7 @@  class FetchData(object):
             elif checksum_plain_name in self.parm:
                 checksum_expected = self.parm[checksum_plain_name]
                 checksum_name = checksum_plain_name
-            elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate", "gs", "gomod", "npm"]:
+            elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate", "gs", "gomod", "npm", "ghra"]:
                 checksum_expected = None
             else:
                 checksum_expected = d.getVarFlag("SRC_URI", checksum_name)
@@ -2066,6 +2066,7 @@  from . import az
 from . import crate
 from . import gcp
 from . import gomod
+from . import github_release_artifact
 
 methods.append(local.Local())
 methods.append(wget.Wget())
@@ -2090,3 +2091,4 @@  methods.append(crate.Crate())
 methods.append(gcp.GCP())
 methods.append(gomod.GoMod())
 methods.append(gomod.GoModGit())
+methods.append(github_release_artifact.GitHubReleaseArtifact())
diff --git a/lib/bb/fetch2/github_release_artifact.py b/lib/bb/fetch2/github_release_artifact.py
new file mode 100644
index 000000000..d5a2646ce
--- /dev/null
+++ b/lib/bb/fetch2/github_release_artifact.py
@@ -0,0 +1,93 @@ 
+"""
+BitBake 'Fetch' GitHub release artifacts implementation
+
+"""
+
+# Copyright (C) 2025 Leonard Göhrs
+#
+# Based on bb.fetch2.wget:
+# Copyright (C) 2003, 2004  Chris Larson
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Based on functions from the base bb module, Copyright 2003 Holger Schurig
+
+import json
+
+from urllib.request import urlopen, Request
+
+from bb.fetch2 import FetchError
+from bb.fetch2.wget import Wget
+
+
+class GitHubReleaseArtifact(Wget):
+    API_HEADERS = {
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+    }
+
+    DOWNLOAD_HEADERS = {
+        "Accept": "application/octet-stream"
+    }
+
+    def supports(self, ud, d):
+        return ud.type in ["ghra"]
+
+    def _resolve_artifact_url(self, ud, d):
+        """Resolve `ghra://` pseudo URLs to `https://` URLs and set auth header.
+
+        This method resolved URLs like `ghra://github.com/rauc/rauc/v1.13/rauc-1.13.tar.xz`
+        to a backing URL like `https://api.github.com/repos/rauc/rauc/releases/assets/222455085`
+        while optionally setting the required authentication headers to download from
+        private repositories.
+        """
+
+        try:
+            user, repo, tag, asset_name = ud.path.strip("/").split("/")
+        except ValueError as e:
+            raise FetchError(
+                f"Expected path like '/<user>/<repo>/<tag>/<asset_name>', got: '{ud.path}'"
+            ) from e
+
+        # The GitHub authentication token may be provided as URL parameter
+        # (to enable using different tokens for different URLs in the same recipe)
+        # or via a variable for cleaner URLs.
+        token = ud.parm.get("token") or d.getVar("GH_TOKEN")
+
+        meta_url = f"https://api.{ud.host}/repos/{user}/{repo}/releases/tags/{tag}"
+
+        auth_headers = {}
+
+        if token is not None:
+            auth_headers["Authorization"] = f"Bearer {token}"
+
+        try:
+            req = Request(url=meta_url, headers=(auth_headers | self.API_HEADERS))
+            with urlopen(req) as resp:
+                result = json.load(resp)
+
+        except Exception as e:
+            raise FetchError(f"Error downloading artifact list: {e}") from e
+
+        asset_urls = dict((asset["name"], asset["url"]) for asset in result["assets"])
+
+        if asset_name not in asset_urls:
+            asset_list = ", ".join(asset_urls.keys())
+            raise FetchError(
+                f"Did not find asset '{asset_name}' in release asset list: {asset_list}"
+            )
+
+        # Override the `url` and `headers` in the FetchData object,
+        # enabling the Wget class to perform the actual downloading.
+        ud.url = asset_urls[asset_name]
+        ud.headers = auth_headers | self.DOWNLOAD_HEADERS
+
+    def checkstatus(self, fetch, ud, d):
+        self._resolve_artifact_url(ud, d)
+
+        return super().checkstatus(fetch, ud, d)
+
+    def download(self, ud, d):
+        self._resolve_artifact_url(ud, d)
+
+        return super().download(ud, d)