diff mbox series

[v3,1/1] scripts: Add clean-hashserver-database script

Message ID 20250312112351.781019-1-c137.marques@gmail.com
State New
Headers show
Series [v3,1/1] scripts: Add clean-hashserver-database script | expand

Commit Message

Alexandre Marques March 12, 2025, 11:23 a.m. UTC
From: Alexandre Marques <c137.marques@gmail.com>

Auxiliary script to clean the hashserver database based on the files
available in the sstate directory.

It makes used of the new "hashclient gc-mark-stream" command to mark all sstate
relevant hashes as "alive" and removes everything else from the
database.

Usage example:
```
./scripts/clean-hashserver-database \
    --sstate-dir ~/build/sstate-cache \
    --hashclient ./bitbake/bin/bitabke-hashclient \
    --hashserver-address "ws://localhost:8688/ws" \
    --mark "alive" \
    --clean-db
```

Signed-off-by: Alexander Marques <c137.marques@gmail.com>
---
 scripts/clean-hashserver-database | 73 +++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100755 scripts/clean-hashserver-database

Comments

Joshua Watt March 12, 2025, 4:27 p.m. UTC | #1
On Wed, Mar 12, 2025 at 5:24 AM Alexandre Marques via
lists.openembedded.org <c137.marques=gmail.com@lists.openembedded.org>
wrote:
>
> From: Alexandre Marques <c137.marques@gmail.com>
>
> Auxiliary script to clean the hashserver database based on the files
> available in the sstate directory.
>
> It makes used of the new "hashclient gc-mark-stream" command to mark all sstate
> relevant hashes as "alive" and removes everything else from the
> database.
>
> Usage example:
> ```
> ./scripts/clean-hashserver-database \
>     --sstate-dir ~/build/sstate-cache \
>     --hashclient ./bitbake/bin/bitabke-hashclient \
>     --hashserver-address "ws://localhost:8688/ws" \
>     --mark "alive" \
>     --clean-db
> ```
>
> Signed-off-by: Alexander Marques <c137.marques@gmail.com>
> ---
>  scripts/clean-hashserver-database | 73 +++++++++++++++++++++++++++++++
>  1 file changed, 73 insertions(+)
>  create mode 100755 scripts/clean-hashserver-database
>
> diff --git a/scripts/clean-hashserver-database b/scripts/clean-hashserver-database
> new file mode 100755
> index 0000000000..6eb006758e
> --- /dev/null
> +++ b/scripts/clean-hashserver-database
> @@ -0,0 +1,73 @@
> +#!/bin/bash
> +set -euo pipefail
> +
> +SSTATE_DIR=""
> +BB_HASHCLIENT=""
> +BB_HASHSERVER=""
> +
> +ALIVE_DB_MARK="alive"
> +CLEAN_DB="false"
> +
> +function help() {
> +    cat <<HELP_TEXT
> +Usage: $0 --sstate-dir path --hashclient path --hashserver-address address [--mark value] [--clean-db]
> +
> +Auxiliary script remove unused or no longer relevant entries from the hashequivalence database, based
> +on the files available on the sstate directory.
> +
> +    -h | --help)               Show this help message and exit
> +    -s | --sstate-dir)         Path to the sstate dir
> +    -c | --hashclient)         Path to bitbake-hashclient
> +    -a | --hashserver-adress)  bitbake-hashserver address
> +    -m | --mark)               Marker string to mark database entries
> +    --clean-db)                Remove all unmarked and unused entries from the database
> +HELP_TEXT
> +}
> +
> +function argument_parser() {
> +    while [ $# -gt 0 ]; do
> +        case "$1" in
> +            -h | --help) help; exit 0 ;;
> +            -s | --sstate-dir) SSTATE_DIR="$2"; shift ;;
> +            -c | --hashclient) BB_HASHCLIENT="$2"; shift ;;
> +            -a | --hashserver-address) BB_HASHSERVER="$2"; shift ;;
> +            -m | --mark) ALIVE_DB_MARK="$2"; shift ;;
> +            --clean-db) CLEAN_DB="true";;
> +            *)
> +                echo "Argument '$1' is not supported" >&2
> +                help >&2
> +                exit 1
> +                ;;
> +        esac
> +        shift
> +    done
> +
> +    function validate_mandatory_argument() {
> +        local var_value="$1"
> +        local error_message="$2"
> +
> +        if [ -z "$var_value" ]; then
> +            echo "$error_message"
> +            help >&2
> +            exit 1
> +        fi
> +    }
> +
> +    validate_mandatory_argument "$SSTATE_DIR" "Please provide the path to the sstate dir."
> +    validate_mandatory_argument "$BB_HASHCLIENT" "Please provide the path to bitbake-hashclient."
> +    validate_mandatory_argument "$BB_HASHSERVER" "Please provide the address of bitbake-hashserver."
> +}
> +
> +# -- main code --
> +argument_parser $@
> +
> +# Mark all db sstate hashes
> +find "$SSTATE_DIR" -name "*.tar.zst" | \
> +sed 's/.*:\([^_]*\)_.*/unihash \1/' | \
> +$BB_HASHCLIENT --address "$BB_HASHSERVER" gc-mark-stream "${ALIVE_DB_MARK}"
> +
> +# Remove unmarked and unused entries
> +if [ "$CLEAN_DB" = "true" ]; then
> +    $BB_HASHCLIENT --address "$BB_HASHSERVER" gc-sweep "${ALIVE_DB_MARK}"
> +    $BB_HASHCLIENT --address "$BB_HASHSERVER" clean-unused 0

The reason for the time is that entries can appear to be unused if
they are created while a build is in progress and you don't
necessarily want to remove them. Ideally, this is longer than your
longest build time. Either way, 0 is probably too aggressive and/or it
should be configurable on the command line.

> +fi
> --
> 2.34.1
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#212644): https://lists.openembedded.org/g/openembedded-core/message/212644
> Mute This Topic: https://lists.openembedded.org/mt/111657664/3616693
> Group Owner: openembedded-core+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [JPEWhacker@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
diff mbox series

Patch

diff --git a/scripts/clean-hashserver-database b/scripts/clean-hashserver-database
new file mode 100755
index 0000000000..6eb006758e
--- /dev/null
+++ b/scripts/clean-hashserver-database
@@ -0,0 +1,73 @@ 
+#!/bin/bash
+set -euo pipefail
+
+SSTATE_DIR=""
+BB_HASHCLIENT=""
+BB_HASHSERVER=""
+
+ALIVE_DB_MARK="alive"
+CLEAN_DB="false"
+
+function help() {
+    cat <<HELP_TEXT
+Usage: $0 --sstate-dir path --hashclient path --hashserver-address address [--mark value] [--clean-db]
+
+Auxiliary script remove unused or no longer relevant entries from the hashequivalence database, based
+on the files available on the sstate directory.
+
+    -h | --help)               Show this help message and exit
+    -s | --sstate-dir)         Path to the sstate dir
+    -c | --hashclient)         Path to bitbake-hashclient
+    -a | --hashserver-adress)  bitbake-hashserver address
+    -m | --mark)               Marker string to mark database entries
+    --clean-db)                Remove all unmarked and unused entries from the database
+HELP_TEXT
+}
+
+function argument_parser() {
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            -h | --help) help; exit 0 ;;
+            -s | --sstate-dir) SSTATE_DIR="$2"; shift ;;
+            -c | --hashclient) BB_HASHCLIENT="$2"; shift ;;
+            -a | --hashserver-address) BB_HASHSERVER="$2"; shift ;;
+            -m | --mark) ALIVE_DB_MARK="$2"; shift ;;
+            --clean-db) CLEAN_DB="true";;
+            *)
+                echo "Argument '$1' is not supported" >&2
+                help >&2
+                exit 1
+                ;;
+        esac
+        shift
+    done
+
+    function validate_mandatory_argument() {
+        local var_value="$1"
+        local error_message="$2"
+
+        if [ -z "$var_value" ]; then
+            echo "$error_message"
+            help >&2
+            exit 1
+        fi
+    }
+
+    validate_mandatory_argument "$SSTATE_DIR" "Please provide the path to the sstate dir."
+    validate_mandatory_argument "$BB_HASHCLIENT" "Please provide the path to bitbake-hashclient."
+    validate_mandatory_argument "$BB_HASHSERVER" "Please provide the address of bitbake-hashserver."
+}
+
+# -- main code --
+argument_parser $@
+
+# Mark all db sstate hashes
+find "$SSTATE_DIR" -name "*.tar.zst" | \
+sed 's/.*:\([^_]*\)_.*/unihash \1/' | \
+$BB_HASHCLIENT --address "$BB_HASHSERVER" gc-mark-stream "${ALIVE_DB_MARK}"
+
+# Remove unmarked and unused entries
+if [ "$CLEAN_DB" = "true" ]; then
+    $BB_HASHCLIENT --address "$BB_HASHSERVER" gc-sweep "${ALIVE_DB_MARK}"
+    $BB_HASHCLIENT --address "$BB_HASHSERVER" clean-unused 0
+fi