From 14e88f283c6513fff07a58b301c350a3cdcc1388 Mon Sep 17 00:00:00 2001
From: Rui Zhao <zhaorui126@gmail.com>
Date: Tue, 12 May 2026 16:35:39 +0800
Subject: [PATCH v1 2/2] Benchmark scripts for lazy snapshot distribution

Provides single-run and matrix benchmark scripts to measure the impact
of the lazy snapshot distribution patch on logical decoding spill bytes
and decoding time.

Scenario: one long-running write transaction with K=1 INSERT, coexisting
with N concurrent CREATE/DROP TABLE pairs (each its own catalog-modifying
commit), then drained via pg_logical_slot_get_changes() with the
test_decoding output plugin.  Captures spill_bytes, spill_count,
total_bytes, and end-to-end decoding wall time.

Files:

  bench/setup_cluster.sh
    Spins up a throwaway PG cluster from a specified install directory,
    configured for logical decoding (wal_level=logical, max_wal_senders,
    max_replication_slots, configurable logical_decoding_work_mem).

  bench/lazy_snapshot_bench.sh
    Runs a single (N, K) scenario against a running cluster.  The long
    transaction runs in a background psql session that does K INSERTs
    and then pg_sleep()s long enough for the concurrent DDL loop to
    complete.  DDLs are batched in a single psql session for ~100x
    throughput over per-statement connections.  Outputs one CSV row.

  bench/run_matrix.sh
    Driver that iterates over a list of N values with configurable
    repeat count (default 3).  Emits a CSV with all replicates.

  bench/aggregate.sh
    Aggregates CSVs (from one or more run_matrix.sh invocations) into
    median-per-cell summary tables, plus a side-by-side master vs
    patch comparison with computed speedup and bytes-saved ratios.

Usage:

  # Build master and patch versions of PostgreSQL
  git checkout master
  ./configure --prefix=/tmp/pg_master_install ... && make install
  git checkout lazy-snapshot-distribution
  make clean && ./configure --prefix=/tmp/pg_patch_install ... && make install

  # Bring up master cluster, run matrix
  eval "$(./bench/setup_cluster.sh /tmp/pg_master_install /tmp/pg_master_data 55432)"
  ./bench/run_matrix.sh master 500 1000 2000 5000 > master.csv
  pg_ctl -D /tmp/pg_master_data stop

  # Bring up patch cluster on a different port, run matrix in parallel
  eval "$(./bench/setup_cluster.sh /tmp/pg_patch_install /tmp/pg_patch_data 55433)"
  ./bench/run_matrix.sh patch 500 1000 2000 5000 > patch.csv

  # Aggregate
  cat master.csv patch.csv | ./bench/aggregate.sh -

These scripts produced the empirical data cited in the
"Performance impact" section of the cover letter for v1-0001.
---
 bench/aggregate.sh           |  99 +++++++++++++++++++++
 bench/lazy_snapshot_bench.sh | 161 +++++++++++++++++++++++++++++++++++
 bench/run_matrix.sh          |  57 +++++++++++++
 bench/setup_cluster.sh       |  75 ++++++++++++++++
 4 files changed, 392 insertions(+)
 create mode 100755 bench/aggregate.sh
 create mode 100755 bench/lazy_snapshot_bench.sh
 create mode 100755 bench/run_matrix.sh
 create mode 100755 bench/setup_cluster.sh

diff --git a/bench/aggregate.sh b/bench/aggregate.sh
new file mode 100755
index 0000000000..481e00c549
--- /dev/null
+++ b/bench/aggregate.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+#
+# Aggregate the CSV from run_matrix.sh into a comparison table suitable for
+# pasting into a pgsql-hackers email.
+#
+# Usage:
+#   $0 <combined.csv>
+# or
+#   cat master.csv patch.csv | $0 -
+#
+# Outputs, per (N, label) combo: median of decoding_ms / spill_bytes /
+# total_bytes across iterations. Then a side-by-side comparison of master
+# vs patch.
+
+set -euo pipefail
+
+if [[ $# -lt 1 ]]; then
+  echo "Usage: $0 <csv-file>  (or '-' for stdin)" >&2
+  exit 1
+fi
+
+INPUT="$1"
+[[ "$INPUT" == "-" ]] && INPUT=/dev/stdin
+
+awk -F, '
+$1 == "label" { next }  # skip any header row (works for concatenated CSVs)
+{
+  key = $1 "," $2  # label,N
+  decoding[key, ++cnt_dec[key]] = $5
+  spill[key, ++cnt_sp[key]]     = $9
+  total[key, ++cnt_tot[key]]    = $10
+  labels[$1] = 1
+  ns[$2 + 0] = 1
+  # remember K (assume constant) and max repeat seen
+  k = $3
+  ldwm = $4
+  if (cnt_dec[key] > rep_max) rep_max = cnt_dec[key]
+}
+END {
+  # median helper baked into END via re-sort per-key (small N, OK)
+  for (key in cnt_dec) {
+    n = cnt_dec[key]
+    # collect into arr
+    delete arr
+    for (i = 1; i <= n; i++) arr[i] = decoding[key, i]
+    asort(arr)
+    if (n % 2) m_dec[key] = arr[(n+1)/2]
+    else       m_dec[key] = (arr[n/2] + arr[n/2 + 1]) / 2
+
+    delete arr
+    for (i = 1; i <= n; i++) arr[i] = spill[key, i]
+    asort(arr)
+    if (n % 2) m_sp[key] = arr[(n+1)/2]
+    else       m_sp[key] = (arr[n/2] + arr[n/2 + 1]) / 2
+
+    delete arr
+    for (i = 1; i <= n; i++) arr[i] = total[key, i]
+    asort(arr)
+    if (n % 2) m_tot[key] = arr[(n+1)/2]
+    else       m_tot[key] = (arr[n/2] + arr[n/2 + 1]) / 2
+  }
+
+  # Sorted N values
+  n_count = 0
+  for (n in ns) sorted_ns[++n_count] = n
+  for (i = 1; i <= n_count; i++)
+    for (j = i + 1; j <= n_count; j++)
+      if (sorted_ns[i] + 0 > sorted_ns[j] + 0) {
+        t = sorted_ns[i]; sorted_ns[i] = sorted_ns[j]; sorted_ns[j] = t
+      }
+
+  printf "Config: K=%s, logical_decoding_work_mem=%s, REPEAT=%d (median)\n\n", k, ldwm, rep_max
+  printf "%-8s %-7s %-14s %-14s %-14s\n", "label", "N", "decode_ms", "spill_bytes", "total_bytes"
+  for (i = 1; i <= n_count; i++) {
+    N = sorted_ns[i]
+    for (lbl in labels) {
+      key = lbl "," N
+      if (key in m_dec)
+        printf "%-8s %-7d %-14d %-14d %-14d\n", lbl, N, m_dec[key], m_sp[key], m_tot[key]
+    }
+  }
+
+  # Side-by-side, if exactly master + patch are present
+  if ("master" in labels && "patch" in labels) {
+    printf "\n%-7s %-12s %-12s %-12s %-12s %-12s %-12s\n", \
+      "N", "master_dec", "patch_dec", "speedup", "master_spill", "patch_spill", "saved_x"
+    for (i = 1; i <= n_count; i++) {
+      N = sorted_ns[i]
+      mk = "master," N; pk = "patch," N
+      if ((mk in m_dec) && (pk in m_dec)) {
+        speedup = (m_dec[pk] > 0) ? m_dec[mk] / m_dec[pk] : 0
+        saved   = (m_sp[pk] > 0) ? m_sp[mk] / m_sp[pk] : (m_sp[mk] > 0 ? 999 : 1)
+        printf "%-7d %-12d %-12d %-12.2f %-12d %-12d %-12.2f\n", \
+          N, m_dec[mk], m_dec[pk], speedup, m_sp[mk], m_sp[pk], saved
+      }
+    }
+  }
+}
+' "$INPUT"
diff --git a/bench/lazy_snapshot_bench.sh b/bench/lazy_snapshot_bench.sh
new file mode 100755
index 0000000000..f16a1a0c1f
--- /dev/null
+++ b/bench/lazy_snapshot_bench.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+#
+# Single benchmark run for "lazy snapshot distribution" patch.
+#
+# Simulates: one long-running transaction (with K inserts) coexists with N
+# concurrent catalog-modifying commits, then drains via pg_logical_slot_get_changes.
+#
+# Output (CSV row to stdout):
+#   label,N,K,ldwm,decoding_ms,decoded_changes,spill_txns,spill_count,spill_bytes,total_bytes,ddl_loop_sec
+#
+# Assumes a running PG cluster with wal_level=logical.  To compare master vs
+# patch, point this script at two different clusters built from each binary.
+
+set -euo pipefail
+
+# -------- args --------
+N=""
+K="1"
+LABEL=""
+LDWM="64kB"
+SLOT="lazy_bench"
+VERBOSE=0
+
+usage() {
+  cat <<EOF
+Usage: $0 -N <num_ddl_commits> -l <label> [-K <inserts>] [-m <ldwm>] [-s <slot>] [-v]
+
+Required:
+  -N <N>      number of concurrent CREATE/DROP TABLE pairs during long txn
+  -l <label>  CSV tag (e.g. "master" or "patch")
+
+Options:
+  -K <K>      inserts in long txn (default 1)
+  -m <mem>   logical_decoding_work_mem (default 64kB; small to amplify spill)
+  -s <slot>  slot name (default lazy_bench)
+  -v          verbose
+
+PG connection: standard libpq env vars (PGHOST/PGPORT/PGDATABASE/PGUSER).
+EOF
+  exit 1
+}
+
+while getopts "N:K:l:m:s:vh" opt; do
+  case $opt in
+    N) N="$OPTARG" ;;
+    K) K="$OPTARG" ;;
+    l) LABEL="$OPTARG" ;;
+    m) LDWM="$OPTARG" ;;
+    s) SLOT="$OPTARG" ;;
+    v) VERBOSE=1 ;;
+    h|*) usage ;;
+  esac
+done
+
+[[ -z "$N" || -z "$LABEL" ]] && usage
+
+log() { [[ $VERBOSE -eq 1 ]] && echo "[bench] $*" >&2 || true; }
+
+PSQL="psql -X -At -q"
+
+# Quote string for SQL.
+sqlstr() { printf "'%s'" "${1//\'/\'\'}"; }
+
+# -------- preflight --------
+
+# Verify cluster reachable and wal_level=logical.
+wal_level=$($PSQL -c "SHOW wal_level;" || { echo "cannot connect to PG" >&2; exit 1; })
+if [[ "$wal_level" != "logical" ]]; then
+  echo "wal_level must be 'logical' (got '$wal_level')" >&2
+  echo "set: ALTER SYSTEM SET wal_level = logical; then restart" >&2
+  exit 1
+fi
+
+# Set logical_decoding_work_mem for this session via slot creation parameter
+# is not possible; it's a backend GUC. We rely on session-level SET inside the
+# decoding query below.
+
+# -------- setup --------
+log "setup: drop old slot/tables, create fresh slot"
+$PSQL <<SQL >/dev/null
+SELECT pg_drop_replication_slot($(sqlstr "$SLOT"))
+  WHERE EXISTS (SELECT 1 FROM pg_replication_slots
+                WHERE slot_name = $(sqlstr "$SLOT"));
+DROP TABLE IF EXISTS bench_data;
+CREATE TABLE bench_data (i int);
+SELECT pg_create_logical_replication_slot($(sqlstr "$SLOT"), 'test_decoding');
+SELECT pg_stat_reset_replication_slot($(sqlstr "$SLOT"));
+SQL
+
+# -------- long-running txn (background) --------
+# It does K inserts then sleeps long enough for the DDL loop to finish.
+# Generous sleep: 0.05s per DDL + 10s buffer. We don't poll; if DDLs finish
+# early, the long txn just sits idle (harmless for the measurement).
+# Sleep budget covers DDL loop time + safety margin.  Tuned for ~1000 DDL/s
+# in single-session mode; conservative 10x factor for slow CI machines.
+SLEEP_SEC=$(awk "BEGIN { printf \"%.2f\", $N * 0.01 + 5 }")
+log "long txn sleep budget: ${SLEEP_SEC}s"
+
+(
+  $PSQL <<SQL
+BEGIN;
+INSERT INTO bench_data SELECT generate_series(1, $K);
+SELECT pg_sleep($SLEEP_SEC);
+COMMIT;
+SQL
+) &
+LONG_PID=$!
+
+# Wait for the long txn to actually start its INSERT before kicking off DDLs.
+sleep 1
+
+# -------- concurrent DDL loop --------
+# Single psql session, one statement per line.  Each unwrapped CREATE/DROP
+# auto-commits as its own transaction, so this generates N catalog-modifying
+# commits in WAL, identical to N separate psql -c calls but ~100x faster.
+log "running $N CREATE/DROP TABLE pairs in a single psql session"
+DDL_START=$(date +%s.%N)
+{
+  echo '\set ON_ERROR_STOP on'
+  for i in $(seq 1 "$N"); do
+    printf 'CREATE TABLE bench_t%d (a int); DROP TABLE bench_t%d;\n' "$i" "$i"
+  done
+} | $PSQL -f - >/dev/null
+DDL_END=$(date +%s.%N)
+DDL_LOOP_SEC=$(awk "BEGIN { printf \"%.2f\", $DDL_END - $DDL_START }")
+log "DDL loop done in ${DDL_LOOP_SEC}s"
+
+# Wait for long txn to commit.
+wait "$LONG_PID"
+log "long txn committed"
+
+# -------- decode + measure --------
+log "draining slot via pg_logical_slot_get_changes"
+DECODE_START=$(date +%s.%N)
+DECODED=$($PSQL <<SQL
+SET logical_decoding_work_mem = '$LDWM';
+SELECT count(*) FROM pg_logical_slot_get_changes(
+  $(sqlstr "$SLOT"), NULL, NULL,
+  'include-xids', '0', 'skip-empty-xacts', '1'
+);
+SQL
+)
+DECODE_END=$(date +%s.%N)
+DECODE_MS=$(awk "BEGIN { printf \"%.0f\", ($DECODE_END - $DECODE_START) * 1000 }")
+log "decoded $DECODED changes in ${DECODE_MS}ms"
+
+# Capture stats (the slot's lifetime stats; we reset at setup, so this is just
+# the current decoding run).
+STATS=$($PSQL -c "
+SELECT spill_txns || ',' || spill_count || ',' || spill_bytes || ',' || total_bytes
+FROM pg_stat_replication_slots
+WHERE slot_name = $(sqlstr "$SLOT");")
+
+# -------- cleanup --------
+$PSQL <<SQL >/dev/null
+SELECT pg_drop_replication_slot($(sqlstr "$SLOT"));
+DROP TABLE bench_data;
+SQL
+
+# -------- output --------
+echo "$LABEL,$N,$K,$LDWM,$DECODE_MS,$DECODED,$STATS,$DDL_LOOP_SEC"
diff --git a/bench/run_matrix.sh b/bench/run_matrix.sh
new file mode 100755
index 0000000000..626941c174
--- /dev/null
+++ b/bench/run_matrix.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+#
+# Run lazy_snapshot_bench.sh across a matrix of N values and emit one CSV.
+#
+# This script runs against a SINGLE PG cluster (it does not switch binaries).
+# To compare master vs patch, run this twice with the appropriate cluster
+# running each time, then concat the two CSVs.
+#
+# Example:
+#   # Cluster is built from master code
+#   $0 master 500 1000 2000 5000 > master.csv
+#
+#   # ... rebuild with patch applied, restart cluster ...
+#   $0 patch 500 1000 2000 5000 > patch.csv
+#
+#   # combine
+#   cat master.csv patch.csv > both.csv
+#
+# Each row is run REPEAT times (default 3) and all replicates are output.
+# Aggregate (min/median/max) externally with awk/Python/pandas.
+
+set -euo pipefail
+
+REPEAT=${REPEAT:-3}
+K=${K:-1}
+LDWM=${LDWM:-64kB}
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+BENCH="$HERE/lazy_snapshot_bench.sh"
+
+if [[ $# -lt 2 ]]; then
+  cat <<EOF >&2
+Usage: $0 <label> <N> [<N> ...]
+  label: "master" | "patch" | any tag
+  N:     one or more values to test
+Environment:
+  REPEAT (default 3) — runs per (label, N) combo
+  K      (default 1) — inserts in long txn
+  LDWM   (default 64kB) — logical_decoding_work_mem (small to amplify spill)
+EOF
+  exit 1
+fi
+
+LABEL="$1"
+shift
+N_VALUES=("$@")
+
+# header
+echo "label,N,K,ldwm,decoding_ms,decoded_changes,spill_txns,spill_count,spill_bytes,total_bytes,ddl_loop_sec,iter"
+
+for N in "${N_VALUES[@]}"; do
+  for ((i = 1; i <= REPEAT; i++)); do
+    echo "==> $LABEL N=$N iter=$i/$REPEAT" >&2
+    row=$("$BENCH" -N "$N" -K "$K" -m "$LDWM" -l "$LABEL")
+    echo "$row,$i"
+  done
+done
diff --git a/bench/setup_cluster.sh b/bench/setup_cluster.sh
new file mode 100755
index 0000000000..fdc2fcf7ed
--- /dev/null
+++ b/bench/setup_cluster.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Spin up a throwaway PG cluster from a specified install dir, configured for
+# logical decoding, and print env vars to source.
+#
+# Usage:
+#   eval "$(./setup_cluster.sh /tmp/pg_master_install /tmp/pg_master_data 55432)"
+#   # now PGPORT, PGHOST etc. point at the new cluster
+#
+# Stop with:
+#   $PGBINDIR/pg_ctl -D $PGDATA stop
+
+set -euo pipefail
+
+if [[ $# -lt 1 ]]; then
+  cat <<EOF >&2
+Usage: $0 <pg_install_dir> [datadir] [port]
+  pg_install_dir: e.g. /tmp/pg_master_install (contains bin/initdb etc.)
+  datadir:        default \$pg_install_dir/data
+  port:           default 55432
+EOF
+  exit 1
+fi
+
+PGBINDIR="$1/bin"
+PGDATA="${2:-$1/data}"
+PGPORT="${3:-55432}"
+
+[[ ! -x "$PGBINDIR/initdb" ]] && { echo "no initdb at $PGBINDIR" >&2; exit 1; }
+
+# Stop if a previous cluster is running here.
+if [[ -f "$PGDATA/postmaster.pid" ]]; then
+  echo "stopping existing cluster at $PGDATA" >&2
+  "$PGBINDIR/pg_ctl" -D "$PGDATA" stop -m fast || true
+  sleep 1
+fi
+
+if [[ ! -d "$PGDATA/base" ]]; then
+  echo "initdb at $PGDATA" >&2
+  "$PGBINDIR/initdb" -D "$PGDATA" --auth=trust >/dev/null
+fi
+
+# Apply logical-decoding-friendly settings.
+cat >> "$PGDATA/postgresql.auto.conf" <<EOF
+# bench/setup_cluster.sh
+wal_level = logical
+max_wal_senders = 10
+max_replication_slots = 10
+shared_buffers = 256MB
+logical_decoding_work_mem = 64kB
+port = $PGPORT
+unix_socket_directories = '$PGDATA'
+EOF
+
+echo "starting cluster" >&2
+"$PGBINDIR/pg_ctl" -D "$PGDATA" -l "$PGDATA/server.log" start
+
+# Wait until ready.
+for i in {1..20}; do
+  if "$PGBINDIR/pg_isready" -h "$PGDATA" -p "$PGPORT" >/dev/null 2>&1; then
+    break
+  fi
+  sleep 0.5
+done
+
+# Emit env-vars to eval.
+cat <<EOF
+export PGBINDIR=$PGBINDIR
+export PGDATA=$PGDATA
+export PGHOST=$PGDATA
+export PGPORT=$PGPORT
+export PGDATABASE=postgres
+export PGUSER=$USER
+export PATH=$PGBINDIR:\$PATH
+EOF
-- 
2.43.7

