diff mbox series

[bitbake-devel,scarthgap] bitbake: gcp.py: remove slow calls to gsutil stat

Message ID 20250605145736.2782365-1-ecordonnier@snap.com
State New
Headers show
Series [bitbake-devel,scarthgap] bitbake: gcp.py: remove slow calls to gsutil stat | expand

Commit Message

Etienne Cordonnier June 5, 2025, 2:57 p.m. UTC
From: Etienne Cordonnier <ecordonnier@snap.com>

The changes of 1ab1d36c0af6fc58a974106b61ff4d37da6cb229 added calls to "gsutil stat" to avoid unhandled exceptions, however:
- in the case of checkstatus() this is redundant with the call to self.gcp_client.bucket(ud.host).blob(path).exists() which already returns True/False
 and does not throw an exception in case the file does not exist.
- Also the call to gsutil stat is much slower than using the python client to call exists() so we should not replace the call to exists() with a call to gsutil stat.
- I think the intent of calling check_network_access in checkstatus() was to error-out in case the error is disabled. We can rather change the string "gsutil stat" to something else to make the code more readable.
- add a try/except block in download() instead of the extra call to gsutil

[RP: Tweak to avoid import until needed so google module isn't required for everyone]
(Bitbake rev: dd120f630e9ddadad95fe83728418335a14d3c3b)

Signed-off-by: Etienne Cordonnier <ecordonnier@snap.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 bitbake/lib/bb/fetch2/gcp.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/bitbake/lib/bb/fetch2/gcp.py b/bitbake/lib/bb/fetch2/gcp.py
index eb3e0c6a6bb..2ee9ed21948 100644
--- a/bitbake/lib/bb/fetch2/gcp.py
+++ b/bitbake/lib/bb/fetch2/gcp.py
@@ -23,7 +23,6 @@  import urllib.parse, urllib.error
 from bb.fetch2 import FetchMethod
 from bb.fetch2 import FetchError
 from bb.fetch2 import logger
-from bb.fetch2 import runfetchcmd
 
 class GCP(FetchMethod):
     """
@@ -48,7 +47,6 @@  class GCP(FetchMethod):
             ud.basename = os.path.basename(ud.path)
 
         ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
-        ud.basecmd = "gsutil stat"
 
     def get_gcp_client(self):
         from google.cloud import storage
@@ -59,17 +57,20 @@  class GCP(FetchMethod):
         Fetch urls using the GCP API.
         Assumes localpath was called first.
         """
+        from google.api_core.exceptions import NotFound
         logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}")
         if self.gcp_client is None:
             self.get_gcp_client()
 
-        bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}")
-        runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d)
+        bb.fetch2.check_network_access(d, "blob.download_to_filename", f"gs://{ud.host}{ud.path}")
 
         # Path sometimes has leading slash, so strip it
         path = ud.path.lstrip("/")
         blob = self.gcp_client.bucket(ud.host).blob(path)
-        blob.download_to_filename(ud.localpath)
+        try:
+            blob.download_to_filename(ud.localpath)
+        except NotFound:
+            raise FetchError("The GCP API threw a NotFound exception")
 
         # Additional sanity checks copied from the wget class (although there
         # are no known issues which mean these are required, treat the GCP API
@@ -91,8 +92,7 @@  class GCP(FetchMethod):
         if self.gcp_client is None:
             self.get_gcp_client()
 
-        bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}")
-        runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d)
+        bb.fetch2.check_network_access(d, "gcp_client.bucket(ud.host).blob(path).exists()", f"gs://{ud.host}{ud.path}")
 
         # Path sometimes has leading slash, so strip it
         path = ud.path.lstrip("/")