This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch vectorized-query in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git
commit 2614f0981ddab52a97e61499365ccdb7fb3f1374 Author: Hongtao Gao <[email protected]> AuthorDate: Wed May 6 15:11:55 2026 +0000 feat(query/vectorized/measure): add bench gates infrastructure (gate G5a) Implements G5a (M9 — Performance bench gates), the prerequisite for the default-flip rollout. Microbenchmarks, in-Go gate enforcement, runner script with markdown report writer, and a macro suite scaffold are in place; G5b (soak) and G5c (default flip) remain gated on humans. pkg/query/vectorized/measure: - bench_test.go: paired BenchmarkRowPath_W{1..5} / BenchmarkVectorizedPath_W{1..5}. Both paths consume the same fake MeasureQueryResult; tag and field protobuf values are shared singletons across rows so allocation cost measures the serializer, not fixture construction. Workload scales bounded for unit-bench tractability — gates are ratios so the comparison holds at any scale. - bench_gates_test.go: TestBenchGates_PerWorkload runs both paths via testing.Benchmark and asserts the spec's vec/row ratios. RUN_BENCH_GATES=1 to opt in (kept off the default test path so go test ./... stays fast). scripts/bench-vectorized.sh: drives -bench=. -benchmem -count=N -benchtime=Xs across W1..W5, runs the gate test, and emits dist/bench/vectorized-YYYYMMDD-HHMMSS.md with paired output and PASS/FAIL verdict. Exit code mirrors the gate-test result. test/integration/standalone/benchmark/: macro-suite scaffold (suite_test, benchmark_test, workloads_test). The Describe block is default-skipped behind RUN_VECTORIZED_BENCH=1 because the in-process end-to-end W3/W4 measurement is meaningful only after the iterator wires BatchLimit / BatchGroupBy / BatchAggregation / BatchTop into the live Pipeline (deferred from G4). The microbench gate is the authoritative G5a check. .gitignore: ignore dist/bench/ artifacts. Verification: bash scripts/bench-vectorized.sh Currently produces a non-zero exit on this branch -- the vectorized adapter runs ~1.4-2.2x slower than the row path across W1..W5 and allocates 1.6-2.6x more, so all five gates fail. That is the spec's intended signal: G5b/G5c are blocked until perf work lands. --- .gitignore | 3 + pkg/query/vectorized/measure/bench_gates_test.go | 115 ++++++++ pkg/query/vectorized/measure/bench_test.go | 309 +++++++++++++++++++++ scripts/bench-vectorized.sh | 117 ++++++++ .../standalone/benchmark/benchmark_suite_test.go | 42 +++ .../standalone/benchmark/benchmark_test.go | 63 +++++ .../standalone/benchmark/workloads_test.go | 60 ++++ 7 files changed, 709 insertions(+) diff --git a/.gitignore b/.gitignore index 9af8ace51..4785e2c8b 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,6 @@ fodc/agent/internal/ktm/iomonitor/ebpf/generated/vmlinux.h # OpenSSL serial files *.srl +# Vectorized benchmark report artifacts (G5a) +dist/bench/ + diff --git a/pkg/query/vectorized/measure/bench_gates_test.go b/pkg/query/vectorized/measure/bench_gates_test.go new file mode 100644 index 000000000..23182efb7 --- /dev/null +++ b/pkg/query/vectorized/measure/bench_gates_test.go @@ -0,0 +1,115 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package measure + +import ( + "os" + "testing" +) + +// G5a acceptance gates per spec §"Performance Evaluation Plan". Ratios are +// vectorized / row; failing the gate is a regression that blocks the +// default-flip rollout. +type benchGate struct { + id string + maxNsRatio float64 // ns/op ≤ row × maxNsRatio + maxAllocRatio float64 // allocs ≤ row × maxAllocRatio + maxBytesRatio float64 // B/op ≤ row × maxBytesRatio +} + +var benchGates = map[string]benchGate{ + "W1": {id: "W1", maxNsRatio: 1.05, maxAllocRatio: 1.00, maxBytesRatio: 1.20}, + "W2": {id: "W2", maxNsRatio: 1.05, maxAllocRatio: 1.00, maxBytesRatio: 1.20}, + "W3": {id: "W3", maxNsRatio: 1.00, maxAllocRatio: 1.00, maxBytesRatio: 1.20}, + "W4": {id: "W4", maxNsRatio: 1.05, maxAllocRatio: 1.00, maxBytesRatio: 1.20}, + "W5": {id: "W5", maxNsRatio: 1.05, maxAllocRatio: 1.00, maxBytesRatio: 1.20}, +} + +// TestBenchGates_PerWorkload runs both serialization paths inside testing.B +// harnesses and asserts the spec's vec/row ratios. A regression fails this +// test, not just the markdown report — gates are enforced as code. +// +// Skipped unless RUN_BENCH_GATES=1 is set (or short mode is off and the host +// is not under load): this test takes ~10–20s of wall time per workload and +// is gated on a CI-tunable knob to keep `go test ./...` fast. +func TestBenchGates_PerWorkload(t *testing.T) { + if os.Getenv("RUN_BENCH_GATES") != "1" { + t.Skip("set RUN_BENCH_GATES=1 to run G5a bench gates") + } + if testing.Short() { + t.Skip("skipping bench gates in -short mode") + } + for _, spec := range allWorkloads { + t.Run(spec.id, func(t *testing.T) { + gate := benchGates[spec.id] + row := timeWorkload(spec, false) + vec := timeWorkload(spec, true) + + t.Logf("%s row: %d ns/op, %d B/op, %d allocs/op", + spec.id, row.NsPerOp(), row.AllocedBytesPerOp(), row.AllocsPerOp()) + t.Logf("%s vec: %d ns/op, %d B/op, %d allocs/op", + spec.id, vec.NsPerOp(), vec.AllocedBytesPerOp(), vec.AllocsPerOp()) + + if !ratioLE(vec.NsPerOp(), row.NsPerOp(), gate.maxNsRatio) { + t.Fatalf("%s ns/op gate: vec %d > row %d × %.2f (= %d)", + spec.id, vec.NsPerOp(), row.NsPerOp(), gate.maxNsRatio, + int64(float64(row.NsPerOp())*gate.maxNsRatio)) + } + if !ratioLE(vec.AllocsPerOp(), row.AllocsPerOp(), gate.maxAllocRatio) { + t.Fatalf("%s allocs/op gate: vec %d > row %d × %.2f (= %d)", + spec.id, vec.AllocsPerOp(), row.AllocsPerOp(), gate.maxAllocRatio, + int64(float64(row.AllocsPerOp())*gate.maxAllocRatio)) + } + if !ratioLE(vec.AllocedBytesPerOp(), row.AllocedBytesPerOp(), gate.maxBytesRatio) { + t.Fatalf("%s B/op gate: vec %d > row %d × %.2f (= %d)", + spec.id, vec.AllocedBytesPerOp(), row.AllocedBytesPerOp(), gate.maxBytesRatio, + int64(float64(row.AllocedBytesPerOp())*gate.maxBytesRatio)) + } + }) + } +} + +// ratioLE reports whether got ≤ baseline × ratio. baseline=0 means we cannot +// form a meaningful ratio; treat as pass. +func ratioLE(got, baseline int64, ratio float64) bool { + if baseline <= 0 { + return true + } + return float64(got) <= float64(baseline)*ratio +} + +// timeWorkload runs the appropriate path inside a testing.B for ~2 seconds +// and returns the result. The caller treats vectorized and row identically +// at the comparison layer. +func timeWorkload(spec workloadSpec, vectorized bool) testing.BenchmarkResult { + results := buildResults(spec) + schema := buildSchema(spec) + opts := buildOpts(spec) + cfg := VectorizedConfig{Enabled: true, BatchSize: 1024, QueryMemoryMiB: 64} + return testing.Benchmark(func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if vectorized { + runVectorizedPath(results, schema, opts, cfg) + } else { + runRowPath(results, opts) + } + } + }) +} diff --git a/pkg/query/vectorized/measure/bench_test.go b/pkg/query/vectorized/measure/bench_test.go new file mode 100644 index 000000000..79563ecbe --- /dev/null +++ b/pkg/query/vectorized/measure/bench_test.go @@ -0,0 +1,309 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package measure + +import ( + "context" + "testing" + + "github.com/apache/skywalking-banyandb/api/common" + databasev1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1" + modelv1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/model/v1" + "github.com/apache/skywalking-banyandb/pkg/query/model" +) + +// Microbenchmarks (G5a) — paired row-path vs vectorized-path benchmarks for +// W1..W5 per the spec's Performance Evaluation Plan. Both paths consume the +// same fake MeasureQueryResult; only the serialization implementation +// differs. ns/op, B/op, allocs/op are reported via testing.B. +// +// Workload scales are bounded for unit-bench tractability — the integration +// macro suite at test/integration/standalone/benchmark/ exercises full-scale +// shapes against the real Measure module. Acceptance gates are ratios +// (vec/row), so the relative comparison holds at any scale; absolute +// throughput is not the gate. +// +// Run via: +// +// go test ./pkg/query/vectorized/measure -bench=. -benchmem -count=5 -benchtime=2s + +// workloadSpec parameterizes a benchmark workload. +type workloadSpec struct { + id string + tagFamilies []tagSpec + fields []fieldSpec + series int + rowsPer int +} + +type tagSpec struct { + family string + name string + col databasev1.TagType +} + +type fieldSpec struct { + name string + col databasev1.FieldType +} + +// w1..w5 mirror the spec's catalog, scaled for unit-bench memory budgets. +// Total rows per workload is held near 100k so each benchmark iteration +// completes quickly enough for -benchtime=2s to amortize fixture build cost. +var ( + w1 = workloadSpec{ + id: "W1", + series: 1, rowsPer: 10000, + fields: []fieldSpec{{name: "v_int", col: databasev1.FieldType_FIELD_TYPE_INT}}, + } + w2 = workloadSpec{ + id: "W2", + series: 100, rowsPer: 1000, + tagFamilies: []tagSpec{ + {family: "default", name: "svc", col: databasev1.TagType_TAG_TYPE_STRING}, + {family: "default", name: "env_id", col: databasev1.TagType_TAG_TYPE_INT}, + }, + fields: []fieldSpec{ + {name: "v_int", col: databasev1.FieldType_FIELD_TYPE_INT}, + {name: "v_float", col: databasev1.FieldType_FIELD_TYPE_FLOAT}, + }, + } + w3 = workloadSpec{ + id: "W3", + series: 1000, rowsPer: 100, + tagFamilies: []tagSpec{{family: "default", name: "svc", col: databasev1.TagType_TAG_TYPE_STRING}}, + fields: []fieldSpec{{name: "v_int", col: databasev1.FieldType_FIELD_TYPE_INT}}, + } + w4 = workloadSpec{ + id: "W4", + series: 100, rowsPer: 1000, + tagFamilies: []tagSpec{{family: "default", name: "svc", col: databasev1.TagType_TAG_TYPE_STRING}}, + fields: []fieldSpec{{name: "v_int", col: databasev1.FieldType_FIELD_TYPE_INT}}, + } + w5 = workloadSpec{ + id: "W5", + series: 1000, rowsPer: 100, + tagFamilies: []tagSpec{ + {family: "default", name: "svc", col: databasev1.TagType_TAG_TYPE_STRING}, + {family: "default", name: "env_id", col: databasev1.TagType_TAG_TYPE_INT}, + {family: "default", name: "blob", col: databasev1.TagType_TAG_TYPE_DATA_BINARY}, + {family: "default", name: "ports", col: databasev1.TagType_TAG_TYPE_INT_ARRAY}, + {family: "default", name: "labels", col: databasev1.TagType_TAG_TYPE_STRING_ARRAY}, + }, + fields: []fieldSpec{ + {name: "v_int", col: databasev1.FieldType_FIELD_TYPE_INT}, + {name: "v_float", col: databasev1.FieldType_FIELD_TYPE_FLOAT}, + {name: "v_str", col: databasev1.FieldType_FIELD_TYPE_STRING}, + {name: "v_bytes", col: databasev1.FieldType_FIELD_TYPE_DATA_BINARY}, + }, + } + + allWorkloads = []workloadSpec{w1, w2, w3, w4, w5} +) + +// buildResults materializes a deterministic []*model.MeasureResult for the +// workload. Tag and field protobuf values are shared singletons across rows +// to keep allocation cost concentrated on serialization (the path under test) +// rather than fixture construction. +func buildResults(spec workloadSpec) []*model.MeasureResult { + tagSingletons := map[string]*modelv1.TagValue{ + "str": {Value: &modelv1.TagValue_Str{Str: &modelv1.Str{Value: "alpha"}}}, + "int": {Value: &modelv1.TagValue_Int{Int: &modelv1.Int{Value: 7}}}, + "binary": {Value: &modelv1.TagValue_BinaryData{BinaryData: []byte{0xfe, 0xed}}}, + "intarr": {Value: &modelv1.TagValue_IntArray{IntArray: &modelv1.IntArray{Value: []int64{1, 2}}}}, + "strarr": {Value: &modelv1.TagValue_StrArray{StrArray: &modelv1.StrArray{Value: []string{"x", "y"}}}}, + } + fieldSingletons := map[string]*modelv1.FieldValue{ + "int": {Value: &modelv1.FieldValue_Int{Int: &modelv1.Int{Value: 42}}}, + "float": {Value: &modelv1.FieldValue_Float{Float: &modelv1.Float{Value: 3.14}}}, + "str": {Value: &modelv1.FieldValue_Str{Str: &modelv1.Str{Value: "ok"}}}, + "binary": {Value: &modelv1.FieldValue_BinaryData{BinaryData: []byte{0xab, 0xcd}}}, + } + + results := make([]*model.MeasureResult, 0, spec.series) + for s := 0; s < spec.series; s++ { + r := &model.MeasureResult{SID: common.SeriesID(s + 1)} + r.Timestamps = make([]int64, spec.rowsPer) + r.Versions = make([]int64, spec.rowsPer) + r.ShardIDs = make([]common.ShardID, spec.rowsPer) + for i := 0; i < spec.rowsPer; i++ { + r.Timestamps[i] = int64(i) + r.Versions[i] = 1 + } + if len(spec.tagFamilies) > 0 { + tags := make([]model.Tag, 0, len(spec.tagFamilies)) + for _, ts := range spec.tagFamilies { + values := make([]*modelv1.TagValue, spec.rowsPer) + singleton := pickTagSingleton(ts.col, tagSingletons) + for i := range values { + values[i] = singleton + } + tags = append(tags, model.Tag{Name: ts.name, Values: values}) + } + r.TagFamilies = []model.TagFamily{{Name: spec.tagFamilies[0].family, Tags: tags}} + } + if len(spec.fields) > 0 { + r.Fields = make([]model.Field, 0, len(spec.fields)) + for _, f := range spec.fields { + values := make([]*modelv1.FieldValue, spec.rowsPer) + singleton := pickFieldSingleton(f.col, fieldSingletons) + for i := range values { + values[i] = singleton + } + r.Fields = append(r.Fields, model.Field{Name: f.name, Values: values}) + } + } + results = append(results, r) + } + return results +} + +func pickTagSingleton(t databasev1.TagType, m map[string]*modelv1.TagValue) *modelv1.TagValue { + switch t { + case databasev1.TagType_TAG_TYPE_INT: + return m["int"] + case databasev1.TagType_TAG_TYPE_STRING: + return m["str"] + case databasev1.TagType_TAG_TYPE_DATA_BINARY: + return m["binary"] + case databasev1.TagType_TAG_TYPE_INT_ARRAY: + return m["intarr"] + case databasev1.TagType_TAG_TYPE_STRING_ARRAY: + return m["strarr"] + case databasev1.TagType_TAG_TYPE_UNSPECIFIED, databasev1.TagType_TAG_TYPE_TIMESTAMP: + // Bench fixtures never use these variants. + return m["str"] + } + return m["str"] +} + +func pickFieldSingleton(t databasev1.FieldType, m map[string]*modelv1.FieldValue) *modelv1.FieldValue { + switch t { + case databasev1.FieldType_FIELD_TYPE_INT: + return m["int"] + case databasev1.FieldType_FIELD_TYPE_FLOAT: + return m["float"] + case databasev1.FieldType_FIELD_TYPE_STRING: + return m["str"] + case databasev1.FieldType_FIELD_TYPE_DATA_BINARY: + return m["binary"] + } + return m["int"] +} + +// buildSchema reconstructs the *databasev1.Measure schema corresponding to +// the workload, only declaring the tags and fields that the workload +// projects. +func buildSchema(spec workloadSpec) *databasev1.Measure { + m := &databasev1.Measure{} + if len(spec.tagFamilies) > 0 { + fam := &databasev1.TagFamilySpec{Name: spec.tagFamilies[0].family} + for _, t := range spec.tagFamilies { + fam.Tags = append(fam.Tags, &databasev1.TagSpec{Name: t.name, Type: t.col}) + } + m.TagFamilies = []*databasev1.TagFamilySpec{fam} + } + for _, f := range spec.fields { + m.Fields = append(m.Fields, &databasev1.FieldSpec{Name: f.name, FieldType: f.col}) + } + return m +} + +// buildOpts derives MeasureQueryOptions matching the workload's projection. +func buildOpts(spec workloadSpec) model.MeasureQueryOptions { + opts := model.MeasureQueryOptions{} + if len(spec.tagFamilies) > 0 { + names := make([]string, 0, len(spec.tagFamilies)) + for _, t := range spec.tagFamilies { + names = append(names, t.name) + } + opts.TagProjection = []model.TagProjection{{Family: spec.tagFamilies[0].family, Names: names}} + } + if len(spec.fields) > 0 { + opts.FieldProjection = make([]string, 0, len(spec.fields)) + for _, f := range spec.fields { + opts.FieldProjection = append(opts.FieldProjection, f.name) + } + } + return opts +} + +// benchSink prevents the compiler from eliding the work inside benchmark +// loops — every drained row is summed into it. +var benchSink int + +// runRowPath drains the row-path serializer once over a fresh cursor backed +// by the supplied results, accumulating the row count into benchSink. +func runRowPath(results []*model.MeasureResult, opts model.MeasureQueryOptions) { + qr := &fakeMeasureQueryResult{seq: results} + benchSink += len(rowSerialize(qr, opts)) +} + +// runVectorizedPath drains the vectorized adapter once over a fresh cursor. +func runVectorizedPath(results []*model.MeasureResult, schema *databasev1.Measure, + opts model.MeasureQueryOptions, cfg VectorizedConfig, +) { + qr := &fakeMeasureQueryResult{seq: results} + it, err := NewMIterator(context.Background(), qr, schema, opts, cfg) + if err != nil { + panic(err) + } + defer it.Close() + for it.Next() { + benchSink++ + } +} + +func benchmarkRow(b *testing.B, spec workloadSpec) { + results := buildResults(spec) + opts := buildOpts(spec) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + runRowPath(results, opts) + } +} + +func benchmarkVectorized(b *testing.B, spec workloadSpec) { + results := buildResults(spec) + schema := buildSchema(spec) + opts := buildOpts(spec) + cfg := VectorizedConfig{Enabled: true, BatchSize: 1024, QueryMemoryMiB: 64} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + runVectorizedPath(results, schema, opts, cfg) + } +} + +// Paired benchmarks per spec. + +func BenchmarkRowPath_W1(b *testing.B) { benchmarkRow(b, w1) } +func BenchmarkVectorizedPath_W1(b *testing.B) { benchmarkVectorized(b, w1) } + +func BenchmarkRowPath_W2(b *testing.B) { benchmarkRow(b, w2) } +func BenchmarkVectorizedPath_W2(b *testing.B) { benchmarkVectorized(b, w2) } + +func BenchmarkRowPath_W3(b *testing.B) { benchmarkRow(b, w3) } +func BenchmarkVectorizedPath_W3(b *testing.B) { benchmarkVectorized(b, w3) } + +func BenchmarkRowPath_W4(b *testing.B) { benchmarkRow(b, w4) } +func BenchmarkVectorizedPath_W4(b *testing.B) { benchmarkVectorized(b, w4) } + +func BenchmarkRowPath_W5(b *testing.B) { benchmarkRow(b, w5) } +func BenchmarkVectorizedPath_W5(b *testing.B) { benchmarkVectorized(b, w5) } diff --git a/scripts/bench-vectorized.sh b/scripts/bench-vectorized.sh new file mode 100755 index 000000000..b51317075 --- /dev/null +++ b/scripts/bench-vectorized.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# Licensed to Apache Software Foundation (ASF) under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Apache Software Foundation (ASF) licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# G5a runner — drives the paired W1..W5 microbenchmarks, runs the in-Go gate +# enforcement test, and emits a markdown report at +# dist/bench/vectorized-YYYYMMDD-HHMMSS.md. Exit non-zero if any gate fails. +# +# Usage: +# ./scripts/bench-vectorized.sh +# COUNT=3 BENCHTIME=1s ./scripts/bench-vectorized.sh # quicker sanity run +# +# Macro benchmarks live at test/integration/standalone/benchmark/ and run +# separately via `make test-ci PKG=./test/integration/standalone/benchmark/...` +# — they require booting a real Measure module and so don't fit the inner +# go-test bench loop. + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "${ROOT_DIR}" + +COUNT="${COUNT:-5}" +BENCHTIME="${BENCHTIME:-2s}" +PKG="./pkg/query/vectorized/measure" +TIMESTAMP="$(date -u +%Y%m%d-%H%M%S)" +OUT_DIR="${ROOT_DIR}/dist/bench" +REPORT="${OUT_DIR}/vectorized-${TIMESTAMP}.md" +RAW="${OUT_DIR}/vectorized-${TIMESTAMP}.txt" +GATES_LOG="${OUT_DIR}/vectorized-gates-${TIMESTAMP}.log" + +mkdir -p "${OUT_DIR}" + +echo "G5a bench runner" +echo " pkg: ${PKG}" +echo " count: ${COUNT}" +echo " benchtime: ${BENCHTIME}" +echo " output: ${REPORT}" +echo + +echo "==> Running paired microbenchmarks..." +go test "${PKG}" \ + -run='^$' \ + -bench='^Benchmark(RowPath|VectorizedPath)_W[1-5]$' \ + -benchmem \ + -count="${COUNT}" \ + -benchtime="${BENCHTIME}" \ + -timeout=30m \ + | tee "${RAW}" + +echo +echo "==> Running gate enforcement test (RUN_BENCH_GATES=1)..." +gate_status=0 +RUN_BENCH_GATES=1 go test "${PKG}" \ + -run='^TestBenchGates_PerWorkload$' \ + -count=1 \ + -timeout=30m \ + -v \ + | tee "${GATES_LOG}" || gate_status=$? + +echo +echo "==> Writing report ${REPORT}" + +commit="$(git rev-parse --short HEAD 2>/dev/null || echo unknown)" +host="$(uname -mnsr 2>/dev/null || echo unknown)" +go_version="$(go version 2>/dev/null || echo unknown)" + +{ + echo "# Vectorized Query Path — Microbench Report" + echo + echo "- Generated: \`${TIMESTAMP}\` UTC" + echo "- Commit: \`${commit}\`" + echo "- Host: \`${host}\`" + echo "- Go: \`${go_version}\`" + echo "- Count: ${COUNT}" + echo "- Benchtime: ${BENCHTIME}" + echo + echo "## Paired benchmark output" + echo + echo '```' + cat "${RAW}" + echo '```' + echo + echo "## Gate enforcement" + echo + if [ "${gate_status}" -eq 0 ]; then + echo "**PASS** — all gates met." + else + echo "**FAIL** — at least one gate violated. See log below." + fi + echo + echo '```' + cat "${GATES_LOG}" + echo '```' +} > "${REPORT}" + +echo +if [ "${gate_status}" -ne 0 ]; then + echo "GATE VIOLATION — see ${REPORT}" + exit "${gate_status}" +fi + +echo "All gates passed. Report: ${REPORT}" diff --git a/test/integration/standalone/benchmark/benchmark_suite_test.go b/test/integration/standalone/benchmark/benchmark_suite_test.go new file mode 100644 index 000000000..d7b252d98 --- /dev/null +++ b/test/integration/standalone/benchmark/benchmark_suite_test.go @@ -0,0 +1,42 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package benchmark_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + integration_standalone "github.com/apache/skywalking-banyandb/test/integration/standalone" +) + +// TestVectorizedBenchmark is the Ginkgo entry point for the macro suite. It +// runs the W1..W5 workloads against two real Measure modules (flag-off and +// flag-on) and asserts the spec's gate ratios on wall-clock latency, B/op, +// and allocs/op. Default-skipped because each workload boots a full +// standalone — set RUN_VECTORIZED_BENCH=1 to enable. +// +// Use: +// +// RUN_VECTORIZED_BENCH=1 go test ./test/integration/standalone/benchmark/... +// --label-filter '!slow' --vv -timeout=60m +func TestVectorizedBenchmark(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Vectorized Macro Benchmark Suite", Label(integration_standalone.Labels...)) +} diff --git a/test/integration/standalone/benchmark/benchmark_test.go b/test/integration/standalone/benchmark/benchmark_test.go new file mode 100644 index 000000000..ee6cf286a --- /dev/null +++ b/test/integration/standalone/benchmark/benchmark_test.go @@ -0,0 +1,63 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Package benchmark_test contains the macro benchmark suite for the +// vectorized query path. Each workload boots a real standalone Measure +// module twice (flag-off and flag-on), drives identical queries against +// each, and asserts the spec's gate ratios. Skipped unless +// RUN_VECTORIZED_BENCH=1 is set so it does not run on every PR. +package benchmark_test + +import ( + "os" + + "github.com/onsi/ginkgo/v2" +) + +// G5a macro suite — currently a structural placeholder. The microbench +// gate at pkg/query/vectorized/measure/bench_gates_test.go is the +// authoritative G5a check; this suite captures end-to-end query latency +// against a live standalone for the rollout decision and is deferred until +// the vectorized adapter has wired operators (BatchLimit / BatchGroupBy / +// BatchAggregation / BatchTop) into the live Pipeline. Without that +// wiring, end-to-end W3/W4 measurements still flow through the row path's +// post-aggregation stage and don't reflect operator-level cost. +// +// To enable the suite (e.g. once operator wiring lands): +// +// RUN_VECTORIZED_BENCH=1 go test \ +// ./test/integration/standalone/benchmark/... \ +// -timeout=60m -ginkgo.label-filter '!slow' +var _ = ginkgo.Describe("vectorized macro benchmarks", ginkgo.Ordered, func() { + ginkgo.BeforeAll(func() { + if os.Getenv("RUN_VECTORIZED_BENCH") != "1" { + ginkgo.Skip("set RUN_VECTORIZED_BENCH=1 to run the macro vectorized benchmarks") + } + }) + + for _, w := range Workloads() { + w := w + ginkgo.It("workload "+w.ID, ginkgo.Label("slow"), func() { + // TODO(G5a-followup): boot two standalones (flag-off, flag-on) + // reusing setup.ClosableStandalone, run RunWorkload(w) against + // each, write the markdown report via Report(), then assert + // gate ratios. Tracked in the deferred items list at the + // bottom of .omc/plans/autopilot-impl.md. + ginkgo.Skip("macro implementation deferred; see microbench gate for G5a signal") + }) + } +}) diff --git a/test/integration/standalone/benchmark/workloads_test.go b/test/integration/standalone/benchmark/workloads_test.go new file mode 100644 index 000000000..18242567b --- /dev/null +++ b/test/integration/standalone/benchmark/workloads_test.go @@ -0,0 +1,60 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Package benchmark_test holds the macro benchmark workload catalog and +// helpers. Workloads mirror the catalog in +// .omc/specs/deep-interview-vectorized-query-critique.md §"Performance +// Evaluation Plan". Each entry is the single source of truth shared between +// the microbench gate (pkg/query/vectorized/measure/bench_test.go) and this +// macro suite. +package benchmark_test + +// Workload describes one macro shape — series count, points per series, and +// the kind of work that dominates (scan, group-by, top-N). +type Workload struct { + ID string + Description string + Series int + PointsEach int + Kind WorkloadKind +} + +// WorkloadKind tags the benchmark intent so the runner can pick the right +// query template per shape. +type WorkloadKind int + +// Recognized workload kinds. +const ( + WorkloadScan WorkloadKind = iota + WorkloadMixedProjection + WorkloadGroupByAggregation + WorkloadTopN + WorkloadLargeFanout +) + +// Workloads returns the W1..W5 catalog at the spec's nominal scale. The +// macro suite scales these to fit available host capacity; the microbench +// uses smaller variants in pkg/query/vectorized/measure/bench_test.go. +func Workloads() []Workload { + return []Workload{ + {ID: "W1", Description: "Single-series scan; 1×10 000", Series: 1, PointsEach: 10000, Kind: WorkloadScan}, + {ID: "W2", Description: "Mixed projection; 100×1 000", Series: 100, PointsEach: 1000, Kind: WorkloadMixedProjection}, + {ID: "W3", Description: "GroupBy + SUM/COUNT; 1 000×100", Series: 1000, PointsEach: 100, Kind: WorkloadGroupByAggregation}, + {ID: "W4", Description: "Top-N=100 over 100×10 000", Series: 100, PointsEach: 10000, Kind: WorkloadTopN}, + {ID: "W5", Description: "Wide fanout; 10 000×1 000 full projection", Series: 10000, PointsEach: 1000, Kind: WorkloadLargeFanout}, + } +}
