Copilot commented on code in PR #1134:
URL:
https://github.com/apache/skywalking-banyandb/pull/1134#discussion_r3254065587
##########
pkg/query/vectorized/measure/top.go:
##########
@@ -224,24 +238,63 @@ func (t *BatchTop) Close() error {
return nil
}
-// materialize copies row rowIdx of b into a new topRow, reading the sort key
-// from the configured field column.
-func (t *BatchTop) materialize(b *vectorized.RecordBatch, rowIdx int, isFloat
bool) *topRow {
+// extractKey reads only the sort key for row rowIdx from the configured
+// field column into row — no per-column deep copy. This is the cheap
+// half of the old materialize: it runs for every input row, while the
+// expensive materializeCols runs only for rows admitted to the heap.
+//
+// The key column may be a native typed column (ColumnTypeInt64 /
+// ColumnTypeFloat64 — promoted when an Agg reduces over the field) or a
+// passthrough *modelv1.FieldValue column (ColumnTypeFieldValue — the
+// non-Agg Scan→Top→Limit path, where BuildBatchSchema leaves projected
+// fields as passthrough). Both are handled so the float/int decision is
+// per-column-shape, matching the row path's schema-field-type dispatch
+// (pkg/query/logical/measure.topOp.Execute).
+func (t *BatchTop) extractKey(b *vectorized.RecordBatch, rowIdx int, row
*topRow) {
+ keyCol := b.Columns[t.fieldCol]
+ switch c := keyCol.(type) {
+ case *vectorized.TypedColumn[float64]:
+ row.isFloat = true
+ if c.IsNull(rowIdx) {
+ row.isNull = true
+ return
+ }
+ row.floatVal = c.Data()[rowIdx]
+ case *vectorized.TypedColumn[int64]:
+ if c.IsNull(rowIdx) {
+ row.isNull = true
+ return
+ }
+ row.intVal = c.Data()[rowIdx]
+ case *vectorized.TypedColumn[*modelv1.FieldValue]:
+ fv := c.Data()[rowIdx]
+ switch v := fv.GetValue().(type) {
+ case *modelv1.FieldValue_Float:
+ row.isFloat = true
+ row.floatVal = v.Float.GetValue()
+ case *modelv1.FieldValue_Int:
+ row.intVal = v.Int.GetValue()
Review Comment:
BatchTop.extractKey can panic when the key column is a passthrough
*modelv1.FieldValue column containing a nil pointer (TypedColumn[*FieldValue]
can hold nil values without marking validity). Please guard fv==nil (and/or
c.IsNull(rowIdx)) before calling fv.GetValue(), treating it as null/lowest like
the other cases.
##########
pkg/query/vectorized/measure/plan/dispatch.go:
##########
@@ -86,13 +88,16 @@ func FellThroughCount() int64 { return
fellThroughCount.Load() }
//
// Eligibility gate (v1):
// - cfg.Enabled must be true
-// - request may carry GroupBy+Agg as a pair (scalar reduce + raw
-// GroupBy are deferred); aggProjectionCoverage must also hold
-// - request must NOT carry Top (BatchTop's single-heap semantic differs
-// from the row path's per-timestamp top-N)
+// - request may carry GroupBy and/or Agg in any combination (group+agg,
+// scalar reduce, raw GroupBy); plan.Analyze auto-extends the
+// projection so the keys / agg field always resolve
+// - request may carry Top: the analyzer emits Scan → Top → Limit
+// (or Scan → GroupByAgg → Top → Limit) and BatchTop reproduces the
+// row path's top-N (G9a)
// - request must carry TimeRange (storage requires a bounded window)
Review Comment:
The eligibility-gate comment still says the request "must carry TimeRange",
but Dispatch now intentionally handles a nil TimeRange (epoch→epoch) for
row-path parity. Please update/remove that bullet to match the actual behavior
to avoid misleading future readers.
##########
pkg/query/vectorized/measure/bench_test.go:
##########
@@ -255,9 +376,13 @@ func buildSchema(spec workloadSpec) *databasev1.Measure {
return m
}
-// buildOpts derives MeasureQueryOptions matching the workload's projection.
+// buildOpts derives MeasureQueryOptions matching the workload's projection,
+// including GroupBy/Agg so BuildBatchSchema promotes the referenced columns
+// to native typed columns exactly as the production planner does. For the
+// hidden-tags shape the criteria tag IS materialized (storage-side filter
+// input) but is removed from the *visible* projection by buildVisibleOpts.
Review Comment:
This comment mentions "buildVisibleOpts", but no such helper exists in this
file/package and the visible-vs-hidden projection handling is done via
hiddenSet()+StripHiddenTags. Please either remove the reference or replace it
with the actual mechanism used here to keep the benchmark documentation
accurate.
##########
cmd/soak-driver/main.go:
##########
@@ -116,15 +127,29 @@ func dialInsecure(addr string) (*grpc.ClientConn, error) {
return conn, nil
}
-// loadCatalog reads the JSON catalog file and returns its entries.
+// loadCatalog reads the JSON catalog file and returns its entries. The
+// catalog is a JSON array of QueryRequest objects; each element is parsed
+// with protojson so proto oneofs/enums round-trip. stdlib json splits the
+// array, protojson decodes each element.
Review Comment:
loadCatalog's doc comment says the catalog is a JSON array of QueryRequest
objects, but the actual on-disk format is an array of {id, request} objects
(rawCatalogEntry). Please update the comment to match the real schema to avoid
confusion when editing the catalog JSON.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]