runqueue: add cpu/io pressure regulation

Message ID 20220629201016.1948793-1-aryaman.gupta@windriver.com
State New
Headers show
Series runqueue: add cpu/io pressure regulation | expand

Commit Message

Aryaman Gupta June 29, 2022, 8:10 p.m. UTC
Stop the scheduler from starting new tasks if the
current cpu or io pressure is above a certain threshold, specified
through the "BB_MAX_{CPU|IO}_SOME_PRESSURE" variables in conf/local.conf.

If the thresholds aren't specified, the default values are 100 for both
CPU and IO, which will have no impact on build times.
Arbitary lower limit of 1.0 results in a fatal error to avoid extremely
long builds. If the percentage limits are higher than 100, then the default
values are used and warnings are issued to inform users that the
specified limit is out of bounds.

Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com>
Signed-off-by: Randy Macleod <randy.macleod@windriver.com>
---
 bitbake/lib/bb/runqueue.py | 39 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

Comments

Jose Quaresma June 30, 2022, 8:32 a.m. UTC | #1
Hi Aryaman,

Aryaman Gupta <aryaman.gupta@windriver.com> escreveu no dia quarta,
29/06/2022 à(s) 21:10:

> Stop the scheduler from starting new tasks if the
> current cpu or io pressure is above a certain threshold, specified
> through the "BB_MAX_{CPU|IO}_SOME_PRESSURE" variables in conf/local.conf.
>
> If the thresholds aren't specified, the default values are 100 for both
> CPU and IO, which will have no impact on build times.
> Arbitary lower limit of 1.0 results in a fatal error to avoid extremely
> long builds. If the percentage limits are higher than 100, then the default
> values are used and warnings are issued to inform users that the
> specified limit is out of bounds.
>
> Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com>
> Signed-off-by: Randy Macleod <randy.macleod@windriver.com>
> ---
>  bitbake/lib/bb/runqueue.py | 39 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
>
> diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
> index 1e47fe70ef..9667acc11c 100644
> --- a/bitbake/lib/bb/runqueue.py
> +++ b/bitbake/lib/bb/runqueue.py
> @@ -159,6 +159,27 @@ class RunQueueScheduler(object):
>                  self.buildable.append(tid)
>
>          self.rev_prio_map = None
> +        # Some hosts like openSUSE have readable /proc/pressure files
> +        # but throw errors when these files are opened.
> +        try:
> +            subprocess.check_output(["cat", "/proc/pressure/cpu",
> "/proc/pressure/io"], \
> +                                    universal_newlines=True,
> stderr=subprocess.DEVNULL)
> +            self.readable_pressure_files = True
> +        except:
> +            if self.rq.max_cpu_pressure!=100 or
> self.rq.max_io_pressure!=100:
> +                bb.warn("The /proc/pressure files can't be read.
> Continuing build without monitoring pressure")
> +            self.readable_pressure_files = False
> +
> +    def exceeds_max_pressure(self):
> +        if self.readable_pressure_files:
>

We can jump when the user does not provide any default values or if they
provide the maximum

if self.readable_pressure_files and (self.rq.max_cpu_pressure < 100 or
self.rq.max_io_pressure < 100):

Jose

+            # extract avg10 from /proc/pressure/{cpu|io}
> +            curr_pressure_sample = subprocess.check_output(["cat",
> "/proc/pressure/cpu", "/proc/pressure/io"], \
> +                                   universal_newlines=True,
> stderr=subprocess.DEVNULL)
> +            curr_cpu_pressure =
> curr_pressure_sample.split('\n')[0].split()[1].split("=")[1]
> +            curr_io_pressure =
> curr_pressure_sample.split('\n')[2].split()[1].split("=")[1]
> +
> +            return float(curr_cpu_pressure) > self.rq.max_cpu_pressure or
> float(curr_io_pressure) > self.rq.max_io_pressure
> +        return False
>
>      def next_buildable_task(self):
>          """
> @@ -171,6 +192,8 @@ class RunQueueScheduler(object):
>          buildable.intersection_update(self.rq.tasks_covered |
> self.rq.tasks_notcovered)
>          if not buildable:
>              return None
> +        if self.exceeds_max_pressure():
> +            return None
>
>          # Filter out tasks that have a max number of threads that have
> been exceeded
>          skip_buildable = {}
> @@ -1699,6 +1722,8 @@ class RunQueueExecute:
>
>          self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS")
> or 1)
>          self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
> +        self.max_cpu_pressure =
> float(self.cfgData.getVar("BB_MAX_CPU_SOME_PRESSURE") or 100.0)
> +        self.max_io_pressure =
> float(self.cfgData.getVar("BB_MAX_IO_SOME_PRESSURE") or 100.0)
>
>          self.sq_buildable = set()
>          self.sq_running = set()
> @@ -1733,6 +1758,20 @@ class RunQueueExecute:
>          if self.number_tasks <= 0:
>               bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks)
>
> +        lower_limit = 1.0
> +        upper_limit = 100.0
> +        if self.max_cpu_pressure < lower_limit:
> +            bb.fatal("Invalid BB_MAX_CPU_SOME_PRESSURE %s, minimum value
> is %s" % (self.max_cpu_pressure, lower_limit))
> +        if self.max_cpu_pressure > upper_limit:
> +            bb.warn("Percentage value of BB_MAX_CPU_SOME_PRESSURE %s
> rounded down to %s" % (self.max_cpu_pressure, upper_limit))
> +            self.max_cpu_pressure = upper_limit
> +
> +        if self.max_io_pressure < lower_limit:
> +            bb.fatal("Invalid BB_MAX_IO_SOME_PRESSURE %s, minimum value
> is %s" % (self.max_io_pressure, lower_limit))
> +        if self.max_io_pressure > upper_limit:
> +            bb.warn("Percentage value of BB_MAX_IO_SOME_PRESSURE %s
> rounded down to %s" % (self.max_io_pressure, upper_limit))
> +            self.max_io_pressure = upper_limit
> +
>          # List of setscene tasks which we've covered
>          self.scenequeue_covered = set()
>          # List of tasks which are covered (including setscene ones)
> --
> 2.35.1
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#167390):
> https://lists.openembedded.org/g/openembedded-core/message/167390
> Mute This Topic: https://lists.openembedded.org/mt/92073312/5052612
> Group Owner: openembedded-core+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [
> quaresma.jose@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
>
Richard Purdie June 30, 2022, 9:53 a.m. UTC | #2
On Wed, 2022-06-29 at 16:10 -0400, Aryaman Gupta wrote:
> Stop the scheduler from starting new tasks if the
> current cpu or io pressure is above a certain threshold, specified
> through the "BB_MAX_{CPU|IO}_SOME_PRESSURE" variables in conf/local.conf.
> 
> If the thresholds aren't specified, the default values are 100 for both
> CPU and IO, which will have no impact on build times.
> Arbitary lower limit of 1.0 results in a fatal error to avoid extremely
> long builds. If the percentage limits are higher than 100, then the default
> values are used and warnings are issued to inform users that the
> specified limit is out of bounds.
> 
> Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com>
> Signed-off-by: Randy Macleod <randy.macleod@windriver.com>
> ---
>  bitbake/lib/bb/runqueue.py | 39 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)

This looks like a good start, thanks. There are a few things which will
need cleaning up in here as this is pretty performance sensitive code
(try a "time bitbake world -n" to see what I mean).

Firstly, it is a bitbake patch so should really go to the bitbake
mailing list, not OE-Core.

> diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
> index 1e47fe70ef..9667acc11c 100644
> --- a/bitbake/lib/bb/runqueue.py
> +++ b/bitbake/lib/bb/runqueue.py
> @@ -159,6 +159,27 @@ class RunQueueScheduler(object):
>                  self.buildable.append(tid)
>  
>          self.rev_prio_map = None
> +        # Some hosts like openSUSE have readable /proc/pressure files
> +        # but throw errors when these files are opened.
> +        try:
> +            subprocess.check_output(["cat", "/proc/pressure/cpu", "/proc/pressure/io"], \
> +                                    universal_newlines=True, stderr=subprocess.DEVNULL)
> +            self.readable_pressure_files = True
>
>         except:
> +            if self.rq.max_cpu_pressure!=100 or self.rq.max_io_pressure!=100:
> +                bb.warn("The /proc/pressure files can't be read. Continuing build without monitoring pressure")
> +            self.readable_pressure_files = False
> +
> +    def exceeds_max_pressure(self):
> +        if self.readable_pressure_files:
> +            # extract avg10 from /proc/pressure/{cpu|io}
> +            curr_pressure_sample = subprocess.check_output(["cat", "/proc/pressure/cpu", "/proc/pressure/io"], \
> +                                   universal_newlines=True, stderr=subprocess.DEVNULL)
> +            curr_cpu_pressure = curr_pressure_sample.split('\n')[0].split()[1].split("=")[1]
> +            curr_io_pressure = curr_pressure_sample.split('\n')[2].split()[1].split("=")[1]

This is horrible, you're adding in a fork() call for every pass through
the scheduler code. You can just open() and read the file instead which
will have *much* lower overhead.

Even then, I'm not particularly thrilled to have this level of overhead
in this codepath, in some ways I'd prefer to rate limit how often we're
looking up this value rather than once per pass through the scheduler
path. I'm curious what the timings say.

> +
> +            return float(curr_cpu_pressure) > self.rq.max_cpu_pressure or float(curr_io_pressure) > self.rq.max_io_pressure
> +        return False
>  
>      def next_buildable_task(self):
>          """
> @@ -171,6 +192,8 @@ class RunQueueScheduler(object):
>          buildable.intersection_update(self.rq.tasks_covered | self.rq.tasks_notcovered)
>          if not buildable:
>              return None
> +        if self.exceeds_max_pressure():
> +            return None
>  
>          # Filter out tasks that have a max number of threads that have been exceeded
>          skip_buildable = {}
> @@ -1699,6 +1722,8 @@ class RunQueueExecute:
>  
>          self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1)
>          self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
> +        self.max_cpu_pressure = float(self.cfgData.getVar("BB_MAX_CPU_SOME_PRESSURE") or 100.0)
> +        self.max_io_pressure = float(self.cfgData.getVar("BB_MAX_IO_SOME_PRESSURE") or 100.0)

I did wonder if this should be BB_PRESSURE_MAX_SOME_IO as the order of
the information kinds of seems backwards to me. That could just be me
though! :)

Cheers,

Richard
Aryaman Gupta June 30, 2022, 8:30 p.m. UTC | #3
See my comments in-line below:

From: Jose Quaresma <quaresma.jose@gmail.com>
Sent: Thursday, June 30, 2022 4:32 AM
To: Gupta, Aryaman <Aryaman.Gupta@windriver.com>
Cc: OE-core <openembedded-core@lists.openembedded.org>
Subject: Re: [OE-core] [PATCH] runqueue: add cpu/io pressure regulation

[Please note: This e-mail is from an EXTERNAL e-mail address]
Hi Aryaman,

Aryaman Gupta <aryaman.gupta@windriver.com<mailto:aryaman.gupta@windriver.com>> escreveu no dia quarta, 29/06/2022 à(s) 21:10:
Stop the scheduler from starting new tasks if the
current cpu or io pressure is above a certain threshold, specified
through the "BB_MAX_{CPU|IO}_SOME_PRESSURE" variables in conf/local.conf.

If the thresholds aren't specified, the default values are 100 for both
CPU and IO, which will have no impact on build times.
Arbitary lower limit of 1.0 results in a fatal error to avoid extremely
long builds. If the percentage limits are higher than 100, then the default
values are used and warnings are issued to inform users that the
specified limit is out of bounds.

Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com<mailto:aryaman.gupta@windriver.com>>
Signed-off-by: Randy Macleod <randy.macleod@windriver.com<mailto:randy.macleod@windriver.com>>
---
 bitbake/lib/bb/runqueue.py<https://urldefense.com/v3/__http:/runqueue.py__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqeHEdnsT$> | 39 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/bitbake/lib/bb/runqueue.py<https://urldefense.com/v3/__http:/runqueue.py__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqeHEdnsT$> b/bitbake/lib/bb/runqueue.py<https://urldefense.com/v3/__http:/runqueue.py__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqeHEdnsT$>
index 1e47fe70ef..9667acc11c 100644
--- a/bitbake/lib/bb/runqueue.py<https://urldefense.com/v3/__http:/runqueue.py__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqeHEdnsT$>
+++ b/bitbake/lib/bb/runqueue.py<https://urldefense.com/v3/__http:/runqueue.py__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqeHEdnsT$>
@@ -159,6 +159,27 @@ class RunQueueScheduler(object):
                 self.buildable.append(tid)

         self.rev_prio_map = None
+        # Some hosts like openSUSE have readable /proc/pressure files
+        # but throw errors when these files are opened.
+        try:
+            subprocess.check_output(["cat", "/proc/pressure/cpu", "/proc/pressure/io"], \
+                                    universal_newlines=True, stderr=subprocess.DEVNULL)
+            self.readable_pressure_files = True
+        except:
+            if self.rq.max_cpu_pressure!=100 or self.rq.max_io_pressure!=100:
+                bb.warn("The /proc/pressure files can't be read. Continuing build without monitoring pressure")
+            self.readable_pressure_files = False
+
+    def exceeds_max_pressure(self):
+        if self.readable_pressure_files:

We can jump when the user does not provide any default values or if they provide the maximum

if self.readable_pressure_files and (self.rq.max_cpu_pressure < 100 or self.rq.max_io_pressure < 100):

Jose

Aryaman: Yes, this makes sense. I intend to do this in V2. Thanks for the review!

Regards,
Aryaman
+            # extract avg10 from /proc/pressure/{cpu|io}
+            curr_pressure_sample = subprocess.check_output(["cat", "/proc/pressure/cpu", "/proc/pressure/io"], \
+                                   universal_newlines=True, stderr=subprocess.DEVNULL)
+            curr_cpu_pressure = curr_pressure_sample.split('\n')[0].split()[1].split("=")[1]
+            curr_io_pressure = curr_pressure_sample.split('\n')[2].split()[1].split("=")[1]
+
+            return float(curr_cpu_pressure) > self.rq.max_cpu_pressure or float(curr_io_pressure) > self.rq.max_io_pressure
+        return False

     def next_buildable_task(self):
         """
@@ -171,6 +192,8 @@ class RunQueueScheduler(object):
         buildable.intersection_update(self.rq.tasks_covered | self.rq.tasks_notcovered)
         if not buildable:
             return None
+        if self.exceeds_max_pressure():
+            return None

         # Filter out tasks that have a max number of threads that have been exceeded
         skip_buildable = {}
@@ -1699,6 +1722,8 @@ class RunQueueExecute:

         self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1)
         self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
+        self.max_cpu_pressure = float(self.cfgData.getVar("BB_MAX_CPU_SOME_PRESSURE") or 100.0)
+        self.max_io_pressure = float(self.cfgData.getVar("BB_MAX_IO_SOME_PRESSURE") or 100.0)

         self.sq_buildable = set()
         self.sq_running = set()
@@ -1733,6 +1758,20 @@ class RunQueueExecute:
         if self.number_tasks <= 0:
              bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks)

+        lower_limit = 1.0
+        upper_limit = 100.0
+        if self.max_cpu_pressure < lower_limit:
+            bb.fatal("Invalid BB_MAX_CPU_SOME_PRESSURE %s, minimum value is %s" % (self.max_cpu_pressure, lower_limit))
+        if self.max_cpu_pressure > upper_limit:
+            bb.warn("Percentage value of BB_MAX_CPU_SOME_PRESSURE %s rounded down to %s" % (self.max_cpu_pressure, upper_limit))
+            self.max_cpu_pressure = upper_limit
+
+        if self.max_io_pressure < lower_limit:
+            bb.fatal("Invalid BB_MAX_IO_SOME_PRESSURE %s, minimum value is %s" % (self.max_io_pressure, lower_limit))
+        if self.max_io_pressure > upper_limit:
+            bb.warn("Percentage value of BB_MAX_IO_SOME_PRESSURE %s rounded down to %s" % (self.max_io_pressure, upper_limit))
+            self.max_io_pressure = upper_limit
+
         # List of setscene tasks which we've covered
         self.scenequeue_covered = set()
         # List of tasks which are covered (including setscene ones)
--
2.35.1


-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#167390): https://lists.openembedded.org/g/openembedded-core/message/167390<https://urldefense.com/v3/__https:/lists.openembedded.org/g/openembedded-core/message/167390__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqWz0Vz7C$>
Mute This Topic: https://lists.openembedded.org/mt/92073312/5052612<https://urldefense.com/v3/__https:/lists.openembedded.org/mt/92073312/5052612__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqZmoDbO6$>
Group Owner: openembedded-core+owner@lists.openembedded.org<mailto:openembedded-core%2Bowner@lists.openembedded.org>
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub<https://urldefense.com/v3/__https:/lists.openembedded.org/g/openembedded-core/unsub__;!!AjveYdw8EvQ!e1CI82Ofx8f4NJzunFw0NlSfrecrWNAK6pHQVh1dDg5FEpHN1-ry3t0YSFmnuF-2QXeK4VIustOeVFkhJrjkqW20_vI8$> [quaresma.jose@gmail.com<mailto:quaresma.jose@gmail.com>]
-=-=-=-=-=-=-=-=-=-=-=-


--
Best regards,

José Quaresma
Aryaman Gupta June 30, 2022, 8:50 p.m. UTC | #4
>-----Original Message-----
>From: Richard Purdie <richard.purdie@linuxfoundation.org>
>Sent: Thursday, June 30, 2022 5:53 AM
>To: Gupta, Aryaman <Aryaman.Gupta@windriver.com>; openembedded-
>core@lists.openembedded.org
>Subject: Re: [OE-core] [PATCH] runqueue: add cpu/io pressure regulation
>
>[Please note: This e-mail is from an EXTERNAL e-mail address]
>
>On Wed, 2022-06-29 at 16:10 -0400, Aryaman Gupta wrote:
>> Stop the scheduler from starting new tasks if the current cpu or io
>> pressure is above a certain threshold, specified through the
>> "BB_MAX_{CPU|IO}_SOME_PRESSURE" variables in conf/local.conf.
>>
>> If the thresholds aren't specified, the default values are 100 for
>> both CPU and IO, which will have no impact on build times.
>> Arbitary lower limit of 1.0 results in a fatal error to avoid
>> extremely long builds. If the percentage limits are higher than 100,
>> then the default values are used and warnings are issued to inform
>> users that the specified limit is out of bounds.
>>
>> Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com>
>> Signed-off-by: Randy Macleod <randy.macleod@windriver.com>
>> ---
>>  bitbake/lib/bb/runqueue.py | 39
>> ++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 39 insertions(+)
>
>This looks like a good start, thanks. There are a few things which will need
>cleaning up in here as this is pretty performance sensitive code (try a "time
>bitbake world -n" to see what I mean).
>
>Firstly, it is a bitbake patch so should really go to the bitbake mailing list, not OE-
>Core.
>

Right, will do!

>> diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
>> index 1e47fe70ef..9667acc11c 100644
>> --- a/bitbake/lib/bb/runqueue.py
>> +++ b/bitbake/lib/bb/runqueue.py
>> @@ -159,6 +159,27 @@ class RunQueueScheduler(object):
>>                  self.buildable.append(tid)
>>
>>          self.rev_prio_map = None
>> +        # Some hosts like openSUSE have readable /proc/pressure files
>> +        # but throw errors when these files are opened.
>> +        try:
>> +            subprocess.check_output(["cat", "/proc/pressure/cpu",
>"/proc/pressure/io"], \
>> +                                    universal_newlines=True, stderr=subprocess.DEVNULL)
>> +            self.readable_pressure_files = True
>>
>>         except:
>> +            if self.rq.max_cpu_pressure!=100 or self.rq.max_io_pressure!=100:
>> +                bb.warn("The /proc/pressure files can't be read. Continuing build
>without monitoring pressure")
>> +            self.readable_pressure_files = False
>> +
>> +    def exceeds_max_pressure(self):
>> +        if self.readable_pressure_files:
>> +            # extract avg10 from /proc/pressure/{cpu|io}
>> +            curr_pressure_sample = subprocess.check_output(["cat",
>"/proc/pressure/cpu", "/proc/pressure/io"], \
>> +                                   universal_newlines=True, stderr=subprocess.DEVNULL)
>> +            curr_cpu_pressure =
>curr_pressure_sample.split('\n')[0].split()[1].split("=")[1]
>> +            curr_io_pressure =
>> + curr_pressure_sample.split('\n')[2].split()[1].split("=")[1]
>
>This is horrible, you're adding in a fork() call for every pass through the
>scheduler code. You can just open() and read the file instead which will have
>*much* lower overhead.

Oh you're right. Randy and I tested this and turns out subprocess is significantly slower.
We also looked at another level of optimization where you open() once and re-use the file descriptor, 
similar to what's done in buildstats. While this is faster, the absolute time to run the code is around 1 or 3
microseconds. Given the added complexity to the code, the optimization does not seem worthwhile.
I'll remove the subprocess() call in v2.

>
>Even then, I'm not particularly thrilled to have this level of overhead in this
>codepath, in some ways I'd prefer to rate limit how often we're looking up this
>value rather than once per pass through the scheduler path. I'm curious what
>the timings say.
>
We wrote this simple test code:
import timeit, subprocess
file = "/proc/pressure/cpu"

def test_subprocess():
    subprocess.check_output(["cat", file])

test1 = open(file)
def test_seek():
    test1.seek(0)
    test1.read()

def test_openread():
    with open(file) as test:
        test.read()
        check_var = 1+1

repeat = 1000
#print("SUBPROCESS: %s " % timeit.Timer(test_subprocess).timeit(number=repeat))
print("SEEK: %s " % timeit.Timer(test_seek).timeit(number=repeat))
print("OPEN AND READ: %s " % timeit.Timer(test_openread).timeit(number=repeat))

Running this with python test.py gave:
SEEK: 0.012695984973106533 
OPEN AND READ: 0.0369647319894284

You can see that this is about 3x faster but only a couple of microseconds in absolute terms.
Do you think that's a reasonable overhead?

>> +
>> +            return float(curr_cpu_pressure) > self.rq.max_cpu_pressure or
>float(curr_io_pressure) > self.rq.max_io_pressure
>> +        return False
>>
>>      def next_buildable_task(self):
>>          """
>> @@ -171,6 +192,8 @@ class RunQueueScheduler(object):
>>          buildable.intersection_update(self.rq.tasks_covered |
>self.rq.tasks_notcovered)
>>          if not buildable:
>>              return None
>> +        if self.exceeds_max_pressure():
>> +            return None
>>
>>          # Filter out tasks that have a max number of threads that have been
>exceeded
>>          skip_buildable = {}
>> @@ -1699,6 +1722,8 @@ class RunQueueExecute:
>>
>>          self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or
>1)
>>          self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
>> +        self.max_cpu_pressure =
>float(self.cfgData.getVar("BB_MAX_CPU_SOME_PRESSURE") or 100.0)
>> +        self.max_io_pressure =
>> + float(self.cfgData.getVar("BB_MAX_IO_SOME_PRESSURE") or 100.0)
>
>I did wonder if this should be BB_PRESSURE_MAX_SOME_IO as the order of the
>information kinds of seems backwards to me. That could just be me though! :)

Sure, that colour of bikeshed is perfect ;^)

Regards,
Aryaman
>
>Cheers,
>
>Richard

Patch

diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
index 1e47fe70ef..9667acc11c 100644
--- a/bitbake/lib/bb/runqueue.py
+++ b/bitbake/lib/bb/runqueue.py
@@ -159,6 +159,27 @@  class RunQueueScheduler(object):
                 self.buildable.append(tid)
 
         self.rev_prio_map = None
+        # Some hosts like openSUSE have readable /proc/pressure files
+        # but throw errors when these files are opened.
+        try:
+            subprocess.check_output(["cat", "/proc/pressure/cpu", "/proc/pressure/io"], \
+                                    universal_newlines=True, stderr=subprocess.DEVNULL)
+            self.readable_pressure_files = True
+        except:
+            if self.rq.max_cpu_pressure!=100 or self.rq.max_io_pressure!=100:
+                bb.warn("The /proc/pressure files can't be read. Continuing build without monitoring pressure")
+            self.readable_pressure_files = False
+
+    def exceeds_max_pressure(self):
+        if self.readable_pressure_files:
+            # extract avg10 from /proc/pressure/{cpu|io}
+            curr_pressure_sample = subprocess.check_output(["cat", "/proc/pressure/cpu", "/proc/pressure/io"], \
+                                   universal_newlines=True, stderr=subprocess.DEVNULL)
+            curr_cpu_pressure = curr_pressure_sample.split('\n')[0].split()[1].split("=")[1]
+            curr_io_pressure = curr_pressure_sample.split('\n')[2].split()[1].split("=")[1]
+
+            return float(curr_cpu_pressure) > self.rq.max_cpu_pressure or float(curr_io_pressure) > self.rq.max_io_pressure
+        return False
 
     def next_buildable_task(self):
         """
@@ -171,6 +192,8 @@  class RunQueueScheduler(object):
         buildable.intersection_update(self.rq.tasks_covered | self.rq.tasks_notcovered)
         if not buildable:
             return None
+        if self.exceeds_max_pressure():
+            return None
 
         # Filter out tasks that have a max number of threads that have been exceeded
         skip_buildable = {}
@@ -1699,6 +1722,8 @@  class RunQueueExecute:
 
         self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1)
         self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
+        self.max_cpu_pressure = float(self.cfgData.getVar("BB_MAX_CPU_SOME_PRESSURE") or 100.0)
+        self.max_io_pressure = float(self.cfgData.getVar("BB_MAX_IO_SOME_PRESSURE") or 100.0)
 
         self.sq_buildable = set()
         self.sq_running = set()
@@ -1733,6 +1758,20 @@  class RunQueueExecute:
         if self.number_tasks <= 0:
              bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks)
 
+        lower_limit = 1.0
+        upper_limit = 100.0
+        if self.max_cpu_pressure < lower_limit:
+            bb.fatal("Invalid BB_MAX_CPU_SOME_PRESSURE %s, minimum value is %s" % (self.max_cpu_pressure, lower_limit))
+        if self.max_cpu_pressure > upper_limit:
+            bb.warn("Percentage value of BB_MAX_CPU_SOME_PRESSURE %s rounded down to %s" % (self.max_cpu_pressure, upper_limit))
+            self.max_cpu_pressure = upper_limit
+
+        if self.max_io_pressure < lower_limit:
+            bb.fatal("Invalid BB_MAX_IO_SOME_PRESSURE %s, minimum value is %s" % (self.max_io_pressure, lower_limit))
+        if self.max_io_pressure > upper_limit:
+            bb.warn("Percentage value of BB_MAX_IO_SOME_PRESSURE %s rounded down to %s" % (self.max_io_pressure, upper_limit))
+            self.max_io_pressure = upper_limit
+
         # List of setscene tasks which we've covered
         self.scenequeue_covered = set()
         # List of tasks which are covered (including setscene ones)