Message ID | 20250121010156.470-1-egyszeregy@freemail.hu |
---|---|
State | New |
Headers | show |
Series | bitbake: knotty/progressbar: Optimize footer update and prints | expand |
On Tue, 2025-01-21 at 02:01 +0100, Livius via lists.openembedded.org wrote: > From: Benjamin Szőke <egyszeregy@freemail.hu> > > - Optimize printing in footer update with use a StringIO buffer. > - Cleaning and refactoring prints in code, print() functions were eliminated from all loops for better performance. > - Use max 10 Hz refresh rate (FPS) for footer update. > - Check file descriptor in ProgressBar() class init to see if it's STDOUT or STDERR and resizable. > > Signed-off-by: Benjamin Szőke <egyszeregy@freemail.hu> This patch looks interesting and I like the idea of optimising this but I do have some concerns and it is hard to review due to the way it has been written. I'd note that it doesn't eliminate the print() calls, it redirects them to the IO buffer and the commit message is a little misleading about that. Further comments below. > --- > lib/bb/ui/knotty.py | 81 ++++++++++++++++++++++++---------- > lib/progressbar/progressbar.py | 23 ++++++---- > 2 files changed, 71 insertions(+), 33 deletions(-) > > diff --git a/lib/bb/ui/knotty.py b/lib/bb/ui/knotty.py > index 2fff1b366..cd40e76e0 100644 > --- a/lib/bb/ui/knotty.py > +++ b/lib/bb/ui/knotty.py > @@ -10,6 +10,7 @@ > > from __future__ import division > > +import io > import os > import sys > import logging > @@ -127,6 +128,10 @@ class InteractConsoleLogFilter(logging.Filter): > return True > > class TerminalFilter(object): > + > + # 10 Hz (FPS) -> 0.100 secs > + _DEFAULT_PRINT_INTERVAL = 0.100 > + > rows = 25 > columns = 80 > > @@ -165,9 +170,12 @@ class TerminalFilter(object): > self.interactive = sys.stdout.isatty() > self.footer_present = False > self.lastpids = [] > - self.lasttime = None > + self.lasttime = time.time() > self.quiet = quiet > > + self._footer_buf = io.StringIO() > + self._footer_lines = None > + > if not self.interactive: > return > > @@ -215,11 +223,11 @@ class TerminalFilter(object): > > def clearFooter(self): > if self.footer_present: > - lines = self.footer_present > - sys.stdout.buffer.write(self.curses.tparm(self.cuu, lines)) > + sys.stdout.buffer.write(self.curses.tparm(self.cuu, self._footer_lines)) > sys.stdout.buffer.write(self.curses.tparm(self.ed)) > sys.stdout.flush() > self.footer_present = False > + self._footer_lines = None > > def elapsed(self, sec): > hrs = int(sec / 3600.0) > @@ -235,28 +243,47 @@ class TerminalFilter(object): > > def keepAlive(self, t): > if not self.cuu: > - print("Bitbake still alive (no events for %ds). Active tasks:" % t) > + msgbuf = ["Bitbake still alive (no events for %ds). Active tasks:" % t] > for t in self.helper.running_tasks: > - print(t) > + msgbuf.append(str(t)) > + print("\n".join(msgbuf)) Does this actually do anything different? I guess the intent is one print call instead of several? Does that make a noticable difference to performance? It isn't going to the IO buffer (which is what the commit message implies)? > sys.stdout.flush() > > def updateFooter(self): > if not self.cuu: > return > + White space changes make the patch harder to read/review/trust. > activetasks = self.helper.running_tasks > failedtasks = self.helper.failed_tasks > runningpids = self.helper.running_pids > currenttime = time.time() > - if not self.lasttime or (currenttime - self.lasttime > 5): > + deltatime = currenttime - self.lasttime > + > + if (deltatime > 5.0): > self.helper.needUpdate = True > - self.lasttime = currenttime > - if self.footer_present and not self.helper.needUpdate: > + need_update = self.helper.needUpdate > + else: > + # Do not let to update faster then _DEFAULT_PRINT_INTERVAL > + # to avoid heavy print() flooding. > + need_update = self.helper.needUpdate and (deltatime > self._DEFAULT_PRINT_INTERVAL) > + > + if self.footer_present and (not need_update): > + # Footer update is not need. > return > + else: > + # Footer update is need and store its "lasttime" value. > + self.lasttime = currenttime > + > self.helper.needUpdate = False > if self.footer_present: > self.clearFooter() > if (not self.helper.tasknumber_total or self.helper.tasknumber_current == self.helper.tasknumber_total) and not len(activetasks): > return > + > + # Clear footer buffer. > + self._footer_buf.truncate(0) > + self._footer_buf.seek(0) > + > tasks = [] > for t in runningpids: > start_time = activetasks[t].get("starttime", None) > @@ -275,6 +302,7 @@ class TerminalFilter(object): > else: > pbar = BBProgress("0: %s" % msg, 100, widgets=[' ', progressbar.Percentage(), ' ', progressbar.Bar(), ''], extrapos=5, resize_handler=self.sigwinch_handle) > pbar.bouncing = False > + pbar.fd = self._footer_buf > activetasks[t]["progressbar"] = pbar > tasks.append((pbar, msg, progress, rate, start_time)) > else: > @@ -284,17 +312,17 @@ class TerminalFilter(object): > content = pluralise("Waiting for %s running task to finish", > "Waiting for %s running tasks to finish", len(activetasks)) > if not self.quiet: > - content += ':' > - print(content) > + content += ":" The quoting changes here which introduces noise into the patch and makes review harder. > + print(content, file=self._footer_buf) > else: > scene_tasks = "%s of %s" % (self.helper.setscene_current, self.helper.setscene_total) > cur_tasks = "%s of %s" % (self.helper.tasknumber_current, self.helper.tasknumber_total) > > - content = '' > + content = "" Again, quoting changes. > if not self.quiet: > msg = "Setscene tasks: %s" % scene_tasks > - content += msg + "\n" > - print(msg) > + content += (msg + "\n") > + print(msg, file=self._footer_buf) > > if self.quiet: > msg = "Running tasks (%s, %s)" % (scene_tasks, cur_tasks) > @@ -306,11 +334,12 @@ class TerminalFilter(object): > if not self.main_progress or self.main_progress.maxval != maxtask: > widgets = [' ', progressbar.Percentage(), ' ', progressbar.Bar()] > self.main_progress = BBProgress("Running tasks", maxtask, widgets=widgets, resize_handler=self.sigwinch_handle) > + self.main_progress.fd = self._footer_buf > self.main_progress.start(False) > self.main_progress.setmessage(msg) > progress = max(0, self.helper.tasknumber_current - 1) > content += self.main_progress.update(progress) > - print('') > + print("", file=self._footer_buf) > lines = self.getlines(content) > if not self.quiet: > for tasknum, task in enumerate(tasks[:(self.rows - 1 - lines)]): > @@ -326,15 +355,19 @@ class TerminalFilter(object): > content = pbar.update(progress) > else: > content = pbar.update(1) > - print('') > + print("", file=self._footer_buf) > else: > content = "%s: %s" % (tasknum, task) > - print(content) > + print(content, file=self._footer_buf) > lines = lines + self.getlines(content) > - self.footer_present = lines > + self._footer_lines = lines > + self.footer_present = True > self.lastpids = runningpids[:] > self.lastcount = self.helper.tasknumber_current > > + # Print footer buffer. > + print(self._footer_buf.getvalue(), end="") > + > def getlines(self, content): > lines = 0 > for line in content.split("\n"): > @@ -342,35 +375,35 @@ class TerminalFilter(object): > return lines > > def finish(self): > + self._footer_buf.close() > if self.stdinbackup: > fd = sys.stdin.fileno() > self.termios.tcsetattr(fd, self.termios.TCSADRAIN, self.stdinbackup) > > def print_event_log(event, includelogs, loglines, termfilter): > - # FIXME refactor this out further > logfile = event.logfile > if logfile and os.path.exists(logfile): > termfilter.clearFooter() > bb.error("Logfile of failure stored in: %s" % logfile) > if includelogs and not event.errprinted: > - print("Log data follows:") > f = open(logfile, "r") > + msgbuf = ["Log data follows:"] > lines = [] > while True: > l = f.readline() > - if l == '': > + if l == "": Quoting... > break > l = l.rstrip() > if loglines: > - lines.append(' | %s' % l) > + lines.append(" | %s" % l) Quoting... > if len(lines) > int(loglines): > lines.pop(0) > else: > - print('| %s' % l) > + msgbuf.append("| %s" % l) > f.close() > if lines: > - for line in lines: > - print(line) > + msgbuf.extend(lines) > + print("\n".join(msgbuf)) > > def _log_settings_from_server(server, observe_only): > # Get values of variables which control our output > diff --git a/lib/progressbar/progressbar.py b/lib/progressbar/progressbar.py > index d4da10ab7..2aa61c015 100644 > --- a/lib/progressbar/progressbar.py > +++ b/lib/progressbar/progressbar.py > @@ -110,19 +110,24 @@ class ProgressBar(object): > self.widgets = widgets > self.fd = fd > self.left_justify = left_justify > + self._fd_console = None > + > + fd_num = fd.fileno() > > self.signal_set = False > if term_width is not None: > self.term_width = term_width > + elif (fd_num == 1) or (fd_num == 2): > + # STDOUT is the default writing descriptor with a value of 1. > + # STDERR is the default error descriptor with a value of 2. > + # Sure, fd is a resizable file descriptor, let's allow to > + # use for self._handle_resize(). > + self._fd_console = fd > + self._handle_resize(None, None) > + signal.signal(signal.SIGWINCH, self._handle_resize) > + self.signal_set = True > else: > - try: > - self._handle_resize(None, None) > - signal.signal(signal.SIGWINCH, self._handle_resize) > - self.signal_set = True > - except (SystemExit, KeyboardInterrupt): raise > - except Exception as e: > - print("DEBUG 5 %s" % e) > - self.term_width = self._env_size() > + self.term_width = self._env_size() This bit concerns me as we're making assumptions about fd == 1 and fd == 2. Can we always guarantee that? Nothing really explains why we need to do this. I suspect you're trying to avoid overhead each time the progress bar is setup but I don't know that and you don't say that. If that is the case, I think we need to find a better way where the result is cached rather than add in assumptions abouf fd numbering. > > self.__iterable = None > self._update_widgets() > @@ -182,7 +187,7 @@ class ProgressBar(object): > def _handle_resize(self, signum=None, frame=None): > """Tries to catch resize signals sent from the terminal.""" > > - h, w = array('h', ioctl(self.fd, termios.TIOCGWINSZ, '\0' * 8))[:2] > + h, w = array('h', ioctl(self._fd_console, termios.TIOCGWINSZ, '\0' * 8))[:2] > self.term_width = w I'd probably suggest that the patch be split up a bit. If you want to combine the writes into a single print call, fine, but that is a single change in its own right. I'd be interested to know a bit about what performance issue you're seeing, how much effect combining the writes has (alone) and if the stringIO buffer really does make much extra difference or not. I've no objection to improving this but I do want to make sure we fix the right things and don't add assumptions about fd numbering which may cause problems later. Cheers, Richard
- fd numbering: https://learning.lpi.org/en/learning-materials/101-500/103/103.4/103.4_01/ *Standard Linux processes have three communication channels opened by default: the standard input channel (most times simply called stdin), the standard output channel (stdout) and the standard error channel (stderr). The numerical file descriptors assigned to these channels are 0 to stdin, 1 to stdout and 2 to stderr.* It seems to me they are always 1 and 2, but maybe it would better to use `sys.stdout.fileno()` and `sys.stderr.fileno()` for comparing. This is need to use for checking that is `fd` is resizable or not. https://stackoverflow.com/questions/32199552/what-is-sys-stdin-fileno-in-python - `self._fd_console` needed to introduce as a dedicated `fd` for resizable `fd`s in order to be possible to set `self.fd` to a `StrinIO` object after the constructor in outside. - There are many hot Python topics about what is the best to use for printing in loops and cycles. Following are the facts: Best: `"\n".join(msgbuf_list)` or `write()` to a `StringIO`, they do not perform any I/O operation for HWs Absolute worst: `print()` in each cycle - `print()` redirect to `StringIO` It is the same as write to `StringIO` directly as a i know, but i liked to keep the old print() style to understand better. `print(content, file=self._footer_buf)` -> `self._footer_buf.write(f"{content}\n")`
diff --git a/lib/bb/ui/knotty.py b/lib/bb/ui/knotty.py index 2fff1b366..cd40e76e0 100644 --- a/lib/bb/ui/knotty.py +++ b/lib/bb/ui/knotty.py @@ -10,6 +10,7 @@ =20 from __future__ import division =20 +import io import os import sys import logging @@ -127,6 +128,10 @@ class InteractConsoleLogFilter(logging.Filter): return True =20 class TerminalFilter(object): + + # 10 Hz (FPS) -> 0.100 secs + _DEFAULT_PRINT_INTERVAL =3D 0.100 + rows =3D 25 columns =3D 80 =20 @@ -165,9 +170,12 @@ class TerminalFilter(object): self.interactive =3D sys.stdout.isatty() self.footer_present =3D False self.lastpids =3D [] - self.lasttime =3D None + self.lasttime =3D time.time() self.quiet =3D quiet =20 + self._footer_buf =3D io.StringIO() + self._footer_lines =3D None + if not self.interactive: return =20 @@ -215,11 +223,11 @@ class TerminalFilter(object): =20 def clearFooter(self): if self.footer_present: - lines =3D self.footer_present - sys.stdout.buffer.write(self.curses.tparm(self.cuu, lines)) + sys.stdout.buffer.write(self.curses.tparm(self.cuu, self._fo= oter_lines)) sys.stdout.buffer.write(self.curses.tparm(self.ed)) sys.stdout.flush() self.footer_present =3D False + self._footer_lines =3D None =20 def elapsed(self, sec): hrs =3D int(sec / 3600.0) @@ -235,28 +243,47 @@ class TerminalFilter(object): =20 def keepAlive(self, t): if not self.cuu: - print("Bitbake still alive (no events for %ds). Active tasks= :" % t) + msgbuf =3D ["Bitbake still alive (no events for %ds). Active= tasks:" % t] for t in self.helper.running_tasks: - print(t) + msgbuf.append(str(t)) + print("\n".join(msgbuf)) sys.stdout.flush() =20 def updateFooter(self): if not self.cuu: return + activetasks =3D self.helper.running_tasks failedtasks =3D self.helper.failed_tasks runningpids =3D self.helper.running_pids currenttime =3D time.time() - if not self.lasttime or (currenttime - self.lasttime > 5): + deltatime =3D currenttime - self.lasttime + + if (deltatime > 5.0): self.helper.needUpdate =3D True - self.lasttime =3D currenttime - if self.footer_present and not self.helper.needUpdate: + need_update =3D self.helper.needUpdate + else: + # Do not let to update faster then _DEFAULT_PRINT_INTERVAL + # to avoid heavy print() flooding. + need_update =3D self.helper.needUpdate and (deltatime > self= ._DEFAULT_PRINT_INTERVAL) + + if self.footer_present and (not need_update): + # Footer update is not need. return + else: + # Footer update is need and store its "lasttime" value. + self.lasttime =3D currenttime + self.helper.needUpdate =3D False if self.footer_present: self.clearFooter() if (not self.helper.tasknumber_total or self.helper.tasknumber_c= urrent =3D=3D self.helper.tasknumber_total) and not len(activetasks): return + + # Clear footer buffer. + self._footer_buf.truncate(0) + self._footer_buf.seek(0) + tasks =3D [] for t in runningpids: start_time =3D activetasks[t].get("starttime", None) @@ -275,6 +302,7 @@ class TerminalFilter(object): else: pbar =3D BBProgress("0: %s" % msg, 100, widgets=3D= [' ', progressbar.Percentage(), ' ', progressbar.Bar(), ''], extrapos=3D5= , resize_handler=3Dself.sigwinch_handle) pbar.bouncing =3D False + pbar.fd =3D self._footer_buf activetasks[t]["progressbar"] =3D pbar tasks.append((pbar, msg, progress, rate, start_time)) else: @@ -284,17 +312,17 @@ class TerminalFilter(object): content =3D pluralise("Waiting for %s running task to finish= ", "Waiting for %s running tasks to finish"= , len(activetasks)) if not self.quiet: - content +=3D ':' - print(content) + content +=3D ":" + print(content, file=3Dself._footer_buf) else: scene_tasks =3D "%s of %s" % (self.helper.setscene_current, = self.helper.setscene_total) cur_tasks =3D "%s of %s" % (self.helper.tasknumber_current, = self.helper.tasknumber_total) =20 - content =3D '' + content =3D "" if not self.quiet: msg =3D "Setscene tasks: %s" % scene_tasks - content +=3D msg + "\n" - print(msg) + content +=3D (msg + "\n") + print(msg, file=3Dself._footer_buf) =20 if self.quiet: msg =3D "Running tasks (%s, %s)" % (scene_tasks, cur_tas= ks) @@ -306,11 +334,12 @@ class TerminalFilter(object): if not self.main_progress or self.main_progress.maxval !=3D = maxtask: widgets =3D [' ', progressbar.Percentage(), ' ', progres= sbar.Bar()] self.main_progress =3D BBProgress("Running tasks", maxta= sk, widgets=3Dwidgets, resize_handler=3Dself.sigwinch_handle) + self.main_progress.fd =3D self._footer_buf self.main_progress.start(False) self.main_progress.setmessage(msg) progress =3D max(0, self.helper.tasknumber_current - 1) content +=3D self.main_progress.update(progress) - print('') + print("", file=3Dself._footer_buf) lines =3D self.getlines(content) if not self.quiet: for tasknum, task in enumerate(tasks[:(self.rows - 1 - lines= )]): @@ -326,15 +355,19 @@ class TerminalFilter(object): content =3D pbar.update(progress) else: content =3D pbar.update(1) - print('') + print("", file=3Dself._footer_buf) else: content =3D "%s: %s" % (tasknum, task) - print(content) + print(content, file=3Dself._footer_buf) lines =3D lines + self.getlines(content) - self.footer_present =3D lines + self._footer_lines =3D lines + self.footer_present =3D True self.lastpids =3D runningpids[:] self.lastcount =3D self.helper.tasknumber_current =20 + # Print footer buffer. + print(self._footer_buf.getvalue(), end=3D"") + def getlines(self, content): lines =3D 0 for line in content.split("\n"): @@ -342,35 +375,35 @@ class TerminalFilter(object): return lines =20 def finish(self): + self._footer_buf.close() if self.stdinbackup: fd =3D sys.stdin.fileno() self.termios.tcsetattr(fd, self.termios.TCSADRAIN, self.stdi= nbackup) =20 def print_event_log(event, includelogs, loglines, termfilter): - # FIXME refactor this out further logfile =3D event.logfile if logfile and os.path.exists(logfile): termfilter.clearFooter() bb.error("Logfile of failure stored in: %s" % logfile) if includelogs and not event.errprinted: - print("Log data follows:") f =3D open(logfile, "r") + msgbuf =3D ["Log data follows:"] lines =3D [] while True: l =3D f.readline() - if l =3D=3D '': + if l =3D=3D "": break l =3D l.rstrip() if loglines: - lines.append(' | %s' % l) + lines.append(" | %s" % l) if len(lines) > int(loglines): lines.pop(0) else: - print('| %s' % l) + msgbuf.append("| %s" % l) f.close() if lines: - for line in lines: - print(line) + msgbuf.extend(lines) + print("\n".join(msgbuf)) =20 def _log_settings_from_server(server, observe_only): # Get values of variables which control our output diff --git a/lib/progressbar/progressbar.py b/lib/progressbar/progressbar= .py index d4da10ab7..2aa61c015 100644 --- a/lib/progressbar/progressbar.py +++ b/lib/progressbar/progressbar.py @@ -110,19 +110,24 @@ class ProgressBar(object): self.widgets =3D widgets self.fd =3D fd self.left_justify =3D left_justify + self._fd_console =3D None + + fd_num =3D fd.fileno() =20 self.signal_set =3D False if term_width is not None: self.term_width =3D term_width + elif (fd_num =3D=3D 1) or (fd_num =3D=3D 2): + # STDOUT is the default writing descriptor with a value of 1= . + # STDERR is the default error descriptor with a value of 2. + # Sure, fd is a resizable file descriptor, let's allow to + # use for self._handle_resize(). + self._fd_console =3D fd + self._handle_resize(None, None) + signal.signal(signal.SIGWINCH, self._handle_resize) + self.signal_set =3D True else: - try: - self._handle_resize(None, None) - signal.signal(signal.SIGWINCH, self._handle_resize) - self.signal_set =3D True - except (SystemExit, KeyboardInterrupt): raise - except Exception as e: - print("DEBUG 5 %s" % e) - self.term_width =3D self._env_size() + self.term_width =3D self._env_size() =20 self.__iterable =3D None