Message ID | 20240823084108.1106312-1-ecordonnier@snap.com |
---|---|
State | Accepted, archived |
Commit | dd120f630e9ddadad95fe83728418335a14d3c3b |
Headers | show |
Series | gcp.py: remove slow calls to gsutil stat | expand |
On Fri, 2024-08-23 at 10:41 +0200, Etienne Cordonnier via lists.openembedded.org wrote: > From: Etienne Cordonnier <ecordonnier@snap.com> > > The changes of 1ab1d36c0af6fc58a974106b61ff4d37da6cb229 added calls to "gsutil stat" to avoid unhandled exceptions, however: > - in the case of checkstatus() this is redundant with the call to self.gcp_client.bucket(ud.host).blob(path).exists() which already returns True/False > and does not throw an exception in case the file does not exist. > - Also the call to gsutil stat is much slower than using the python client to call exists() so we should not replace the call to exists() with a call to gsutil stat. > - I think the intent of calling check_network_access in checkstatus() was to error-out in case the error is disabled. We can rather change the string "gsutil stat" to something else to make the code more readable. > - add a try/except block in download() instead of the extra call to gsutil > > Signed-off-by: Etienne Cordonnier <ecordonnier@snap.com> > --- > lib/bb/fetch2/gcp.py | 14 +++++++------- > 1 file changed, 7 insertions(+), 7 deletions(-) > > diff --git a/lib/bb/fetch2/gcp.py b/lib/bb/fetch2/gcp.py > index eb3e0c6a6..9ac2f752f 100644 > --- a/lib/bb/fetch2/gcp.py > +++ b/lib/bb/fetch2/gcp.py > @@ -19,11 +19,11 @@ Additionally, gsutil must also be installed. > > import os > import bb > +from google.api_core.exceptions import NotFound > import urllib.parse, urllib.error > from bb.fetch2 import FetchMethod > from bb.fetch2 import FetchError > from bb.fetch2 import logger > -from bb.fetch2 import runfetchcmd > > class GCP(FetchMethod): > """ I put this into a testing branch and everything broke since the google module isn't present in most places. I've moved the import into the download method. Cheers, Richard
Makes sense, I had forgotten that the module can be included inside functions. Étienne On Fri, Aug 23, 2024 at 11:14 AM Richard Purdie < richard.purdie@linuxfoundation.org> wrote: > On Fri, 2024-08-23 at 10:41 +0200, Etienne Cordonnier via > lists.openembedded.org wrote: > > From: Etienne Cordonnier <ecordonnier@snap.com> > > > > The changes of 1ab1d36c0af6fc58a974106b61ff4d37da6cb229 added calls to > "gsutil stat" to avoid unhandled exceptions, however: > > - in the case of checkstatus() this is redundant with the call to > self.gcp_client.bucket(ud.host).blob(path).exists() which already returns > True/False > > and does not throw an exception in case the file does not exist. > > - Also the call to gsutil stat is much slower than using the python > client to call exists() so we should not replace the call to exists() with > a call to gsutil stat. > > - I think the intent of calling check_network_access in checkstatus() > was to error-out in case the error is disabled. We can rather change the > string "gsutil stat" to something else to make the code more readable. > > - add a try/except block in download() instead of the extra call to > gsutil > > > > Signed-off-by: Etienne Cordonnier <ecordonnier@snap.com> > > --- > > lib/bb/fetch2/gcp.py | 14 +++++++------- > > 1 file changed, 7 insertions(+), 7 deletions(-) > > > > diff --git a/lib/bb/fetch2/gcp.py b/lib/bb/fetch2/gcp.py > > index eb3e0c6a6..9ac2f752f 100644 > > --- a/lib/bb/fetch2/gcp.py > > +++ b/lib/bb/fetch2/gcp.py > > @@ -19,11 +19,11 @@ Additionally, gsutil must also be installed. > > > > import os > > import bb > > +from google.api_core.exceptions import NotFound > > import urllib.parse, urllib.error > > from bb.fetch2 import FetchMethod > > from bb.fetch2 import FetchError > > from bb.fetch2 import logger > > -from bb.fetch2 import runfetchcmd > > > > class GCP(FetchMethod): > > """ > > I put this into a testing branch and everything broke since the google > module isn't present in most places. I've moved the import into the > download method. > > Cheers, > > Richard >
diff --git a/lib/bb/fetch2/gcp.py b/lib/bb/fetch2/gcp.py index eb3e0c6a6..9ac2f752f 100644 --- a/lib/bb/fetch2/gcp.py +++ b/lib/bb/fetch2/gcp.py @@ -19,11 +19,11 @@ Additionally, gsutil must also be installed. import os import bb +from google.api_core.exceptions import NotFound import urllib.parse, urllib.error from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import logger -from bb.fetch2 import runfetchcmd class GCP(FetchMethod): """ @@ -48,7 +48,6 @@ class GCP(FetchMethod): ud.basename = os.path.basename(ud.path) ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) - ud.basecmd = "gsutil stat" def get_gcp_client(self): from google.cloud import storage @@ -63,13 +62,15 @@ class GCP(FetchMethod): if self.gcp_client is None: self.get_gcp_client() - bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") - runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) + bb.fetch2.check_network_access(d, "blob.download_to_filename", f"gs://{ud.host}{ud.path}") # Path sometimes has leading slash, so strip it path = ud.path.lstrip("/") blob = self.gcp_client.bucket(ud.host).blob(path) - blob.download_to_filename(ud.localpath) + try: + blob.download_to_filename(ud.localpath) + except NotFound: + raise FetchError("The GCP API threw a NotFound exception") # Additional sanity checks copied from the wget class (although there # are no known issues which mean these are required, treat the GCP API @@ -91,8 +92,7 @@ class GCP(FetchMethod): if self.gcp_client is None: self.get_gcp_client() - bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") - runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) + bb.fetch2.check_network_access(d, "gcp_client.bucket(ud.host).blob(path).exists()", f"gs://{ud.host}{ud.path}") # Path sometimes has leading slash, so strip it path = ud.path.lstrip("/")