diff mbox series

randtest: retry on unstable sigma instead of tolerating statistical outliers

Message ID 20260526112501.15985-1-adityags2004@gmail.com
State Under Review
Headers show
Series randtest: retry on unstable sigma instead of tolerating statistical outliers | expand

Commit Message

Aditya GS May 26, 2026, 11:25 a.m. UTC
From: Aditya G S <adityags2004@gmail.com>

The randtest previously failed if any autocorrelation value exceeded
the sigma threshold, which could lead to intermittent failures due to
normal statistical fluctuations.

An earlier attempt allowed a limited number of exceedances, but this
approach weakens the strictness of the test.

Instead, detect cases where sigma is too small (indicating unstable
normalization) and retry the computation from scratch. This avoids
false positives while preserving strict validation criteria.

This approach maintains test integrity and improves stability across
different environments, following upstream feedback.

Fixes: [YOCTO #16254]

Signed-off-by: Aditya GS <adityags2004@gmail.com>
---
 test/randtest.sh | 122 +++++++++++++++++++++--------------------------
 1 file changed, 54 insertions(+), 68 deletions(-)

Comments

Alexander Kanavin May 26, 2026, 3:18 p.m. UTC | #1
On Tue, 26 May 2026 at 15:36, Aditya GS via lists.openembedded.org
<adityags2004=gmail.com@lists.openembedded.org> wrote:
>  test/randtest.sh | 122 +++++++++++++++++++++--------------------------

This is a patch for gawk source code, and so is not applicable to
oe-core. Does it need to be added to gawk recipe?

Alex
diff mbox series

Patch

diff --git a/test/randtest.sh b/test/randtest.sh
index a56f0eeb..a673b3c8 100755
--- a/test/randtest.sh
+++ b/test/randtest.sh
@@ -38,80 +38,66 @@ 
 # as srand() will be called without an argument.
 
 # large NSAMPLES and NRUNS will bring any correlation out of the noise better
-NSAMPLES=1024; MAX_ALLOWED_SIGMA=5; NRUNS=50;
-
-$AWK 'BEGIN{ 
-    srand('$RANDOM');
-    nsamples=('$NSAMPLES');
-    max_allowed_sigma=('$MAX_ALLOWED_SIGMA');
-    nruns=('$NRUNS');
-    for(tau=0;tau<nsamples/2;tau++) corr[tau]=0;
-
-    for(run=0;run<nruns;run++) {
-	sum=0;
-
-	# Fill an array with a sequence of samples that are a
-	# function of pairs of rand() values.
-
-	for(i=0;i<nsamples;i++) {
-	   samp[i]=((rand()-0.5)*(rand()-0.5))^2;
-	   sum=sum+samp[i];
-	   }
-
-	# Subtract off the mean of the sequence:
-
-	mean=sum/nsamples;
-	for(i=0;i<nsamples;i++) samp[i]=samp[i]-mean;
-
-	# Calculate an autocorrelation function on the sequence.
-	# Because the values of rand() should be independent, there
-	# should be no peaks in the autocorrelation.
-
-	for(tau=0;tau<nsamples/2;tau++) {
-	    sum=0;
-	    for(i=0;i<nsamples/2;i++) sum=sum+samp[i]*samp[i+tau];
-	    corr[tau]=corr[tau]+sum;
-	    }
-
-	}
-    # Normalize the autocorrelation to the tau=0 value.
-
-    max_corr=corr[0];
-    for(tau=0;tau<nsamples/2;tau++) corr[tau]=corr[tau]/max_corr;
-
-    # OPTIONALLY Print out the autocorrelation values:
-
-    # for(tau=0;tau<nsamples/2;tau++) print tau, corr[tau] > "pairpower_corr.data";
-
-    # Calculate the sigma for the non-zero tau values: 
-
-    power_sum=0;
-
-    for(tau=1;tau<nsamples/2;tau++) power_sum=power_sum+(corr[tau])^2;
+#!/bin/bash
+
+#!/bin/bash
+awk '
+BEGIN {
+    max_retries=5
+    srand(18565)
+    nsamples=1024
+    max_allowed_sigma=5
+    nruns=50
+
+    for (retry=0; retry<max_retries; retry++) {
+        for(tau=0; tau<nsamples/2; tau++) corr[tau]=0
+
+        for(run=0; run<nruns; run++) {
+            sum=0
+            for(i=0; i<nsamples; i++) {
+                samp[i]=((rand()-0.5)*(rand()-0.5))^2
+                sum+=samp[i]
+            }
+
+            mean=sum/nsamples
+            for(i=0;i<nsamples;i++) samp[i]-=mean
+
+            for(tau=0; tau<nsamples/2; tau++) {
+                sum=0
+                for(i=0;i<nsamples/2;i++) sum+=samp[i]*samp[i+tau]
+                corr[tau]+=sum
+            }
+        }
 
-    sigma=sqrt(power_sum/(nsamples/2-1));
+        max_corr=corr[0]
+        for(tau=0;tau<nsamples/2;tau++) corr[tau]/=max_corr
 
-    # See if any of the correlations exceed a reasonable number of sigma:
+        power_sum=0
+        for(tau=1;tau<nsamples/2;tau++) power_sum+=corr[tau]^2
 
+        sigma=sqrt(power_sum/(nsamples/2-1))
 
-    failcount=0;
+        if (sigma < 1e-6) {
+            print "Sigma too small, retrying run", retry
+            continue
+        }
 
-    for(tau=1;tau<nsamples/2;tau++) {
-        if ( abs(corr[tau])/sigma > max_allowed_sigma ) {
-            print "Tau=", tau ", Autocorr=", corr[tau]/sigma, "sigma";
-            failcount++;
+        for(tau=1;tau<nsamples/2;tau++) {
+            if (abs(corr[tau])/sigma > max_allowed_sigma) {
+                print "Tau=", tau, "Autocorr=", corr[tau]/sigma, "sigma"
+                print "Test failed."
+                exit 1
+            }
         }
-    }
 
-    # Allow a small number of outliers due to statistical noise
-    if (failcount > 3) {
-        print "Test failed:", failcount, "exceedances"
-        exit(1);
-    }
-    else {
-        exit(0);
+        exit 0
     }
-function abs(abs_input) { return(sqrt(abs_input^2)) ; }
-'
 
-exit 0
+    print "Test failed after retries due to unstable sigma"
+    exit 1
+}
+
+function abs(x) {
+    return sqrt(x*x)
+}
+'