Re: [PR] Fix empty tag of SIDX [skywalking-banyandb]

via GitHub Wed, 19 Nov 2025 21:49:58 -0800


Copilot commented on code in PR #854:
URL: 
https://github.com/apache/skywalking-banyandb/pull/854#discussion_r2544432311



##########
banyand/internal/sidx/scan_query.go:
##########
@@ -0,0 +1,203 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package sidx
+
+import (
+       "context"
+       "math"
+
+       "github.com/apache/skywalking-banyandb/api/common"
+)
+
+// ScanQuery executes a synchronous full-scan query.
+func (s *sidx) ScanQuery(ctx context.Context, req ScanQueryRequest) 
([]*QueryResponse, error) {
+       if err := req.Validate(); err != nil {
+               return nil, err
+       }
+
+       snap := s.currentSnapshot()
+       if snap == nil {
+               return nil, nil
+       }
+       defer snap.decRef()
+
+       // Set default batch size
+       maxBatchSize := req.MaxBatchSize
+       if maxBatchSize <= 0 {
+               maxBatchSize = 1000
+       }
+
+       // Set key range
+       minKey := int64(math.MinInt64)
+       maxKey := int64(math.MaxInt64)
+       if req.MinKey != nil {
+               minKey = *req.MinKey
+       }
+       if req.MaxKey != nil {
+               maxKey = *req.MaxKey
+       }
+
+       var results []*QueryResponse
+
+       // Prepare Tags map if projection is specified
+       var tagsMap map[string][]string
+       if len(req.TagProjection) > 0 {
+               tagsMap = make(map[string][]string)
+               for _, proj := range req.TagProjection {
+                       for _, tagName := range proj.Names {
+                               tagsMap[tagName] = make([]string, 0, 
maxBatchSize)
+                       }
+               }
+       }
+
+       currentBatch := &QueryResponse{
+               Keys:    make([]int64, 0, maxBatchSize),
+               Data:    make([][]byte, 0, maxBatchSize),
+               SIDs:    make([]common.SeriesID, 0, maxBatchSize),
+               PartIDs: make([]uint64, 0, maxBatchSize),
+               Tags:    tagsMap,
+       }
+
+       // Scan all parts
+       totalParts := len(snap.parts)
+       for partIdx, pw := range snap.parts {
+               rowsBefore := 0
+               for _, res := range results {
+                       rowsBefore += res.Len()
+               }
+               rowsBefore += currentBatch.Len()
+
+               if err := s.scanPart(ctx, pw, req, minKey, maxKey, &results, 
&currentBatch, maxBatchSize); err != nil {
+                       return nil, err
+               }
+
+               // Count total rows found so far
+               totalRowsFound := 0
+               for _, res := range results {
+                       totalRowsFound += res.Len()
+               }
+               totalRowsFound += currentBatch.Len()
+
+               // Report progress if callback is provided
+               if req.OnProgress != nil {
+                       req.OnProgress(partIdx+1, totalParts, totalRowsFound)
+               }
+       }
+
+       // Add remaining batch if not empty
+       if currentBatch.Len() > 0 {
+               results = append(results, currentBatch)
+       }
+
+       return results, nil
+}
+
+func (s *sidx) scanPart(ctx context.Context, pw *partWrapper, req 
ScanQueryRequest,
+       minKey, maxKey int64, results *[]*QueryResponse, currentBatch 
**QueryResponse,
+       maxBatchSize int,
+) error {

Review Comment:
   [nitpick] The function signature passes `currentBatch` as a double pointer 
(`**QueryResponse`), which is unusual and can be confusing. Consider 
refactoring to return the updated batch as a return value alongside the error, 
making the API clearer: `func scanPart(...) (*QueryResponse, error)` and have 
the caller handle appending to results.



##########
banyand/internal/sidx/sidx.go:
##########
@@ -496,9 +497,37 @@ func (bc *blockCursor) copyTo(result *QueryResponse) bool {
        result.SIDs = append(result.SIDs, bc.seriesID)
        result.PartIDs = append(result.PartIDs, bc.p.ID())
 
+       // Copy projected tags if specified
+       if len(bc.request.TagProjection) > 0 && len(result.Tags) > 0 {
+               for _, proj := range bc.request.TagProjection {
+                       for _, tagName := range proj.Names {
+                               if tagData, exists := bc.tags[tagName]; exists 
&& bc.idx < len(tagData) {
+                                       tagValue := 
formatTagValue(tagData[bc.idx])
+                                       result.Tags[tagName] = 
append(result.Tags[tagName], tagValue)
+                               } else {
+                                       // Tag not present for this row
+                                       result.Tags[tagName] = 
append(result.Tags[tagName], "")
+                               }
+                       }
+               }
+       }
+
        return true
 }
 
+// formatTagValue converts a Tag to a string representation.
+func formatTagValue(tag Tag) string {
+       if len(tag.ValueArr) > 0 {
+               // Array of values
+               values := make([]string, len(tag.ValueArr))
+               for i, v := range tag.ValueArr {
+                       values[i] = string(v)
+               }
+               return "[" + strings.Join(values, ",") + "]"
+       }
+       return string(tag.Value)
+}

Review Comment:
   The formatTagValue function lacks documentation. Add a comment describing 
that this function converts a Tag to a string representation, with arrays 
formatted as comma-separated values in brackets. Also clarify what happens when 
both Value and ValueArr are empty.



##########
banyand/internal/sidx/scan_query.go:
##########
@@ -0,0 +1,203 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package sidx
+
+import (
+       "context"
+       "math"
+
+       "github.com/apache/skywalking-banyandb/api/common"
+)
+
+// ScanQuery executes a synchronous full-scan query.
+func (s *sidx) ScanQuery(ctx context.Context, req ScanQueryRequest) 
([]*QueryResponse, error) {
+       if err := req.Validate(); err != nil {
+               return nil, err
+       }
+
+       snap := s.currentSnapshot()
+       if snap == nil {
+               return nil, nil
+       }
+       defer snap.decRef()
+
+       // Set default batch size
+       maxBatchSize := req.MaxBatchSize
+       if maxBatchSize <= 0 {
+               maxBatchSize = 1000
+       }
+
+       // Set key range
+       minKey := int64(math.MinInt64)
+       maxKey := int64(math.MaxInt64)
+       if req.MinKey != nil {
+               minKey = *req.MinKey
+       }
+       if req.MaxKey != nil {
+               maxKey = *req.MaxKey
+       }
+
+       var results []*QueryResponse
+
+       // Prepare Tags map if projection is specified
+       var tagsMap map[string][]string
+       if len(req.TagProjection) > 0 {
+               tagsMap = make(map[string][]string)
+               for _, proj := range req.TagProjection {
+                       for _, tagName := range proj.Names {
+                               tagsMap[tagName] = make([]string, 0, 
maxBatchSize)
+                       }
+               }
+       }
+
+       currentBatch := &QueryResponse{
+               Keys:    make([]int64, 0, maxBatchSize),
+               Data:    make([][]byte, 0, maxBatchSize),
+               SIDs:    make([]common.SeriesID, 0, maxBatchSize),
+               PartIDs: make([]uint64, 0, maxBatchSize),
+               Tags:    tagsMap,
+       }
+
+       // Scan all parts
+       totalParts := len(snap.parts)
+       for partIdx, pw := range snap.parts {
+               rowsBefore := 0
+               for _, res := range results {
+                       rowsBefore += res.Len()
+               }
+               rowsBefore += currentBatch.Len()
+
+               if err := s.scanPart(ctx, pw, req, minKey, maxKey, &results, 
&currentBatch, maxBatchSize); err != nil {
+                       return nil, err
+               }
+
+               // Count total rows found so far
+               totalRowsFound := 0
+               for _, res := range results {
+                       totalRowsFound += res.Len()
+               }
+               totalRowsFound += currentBatch.Len()
+
+               // Report progress if callback is provided
+               if req.OnProgress != nil {
+                       req.OnProgress(partIdx+1, totalParts, totalRowsFound)
+               }
+       }
+
+       // Add remaining batch if not empty
+       if currentBatch.Len() > 0 {
+               results = append(results, currentBatch)
+       }
+
+       return results, nil
+}
+
+func (s *sidx) scanPart(ctx context.Context, pw *partWrapper, req 
ScanQueryRequest,
+       minKey, maxKey int64, results *[]*QueryResponse, currentBatch 
**QueryResponse,
+       maxBatchSize int,
+) error {
+       p := pw.p
+       bma := generateBlockMetadataArray()
+       defer releaseBlockMetadataArray(bma)
+
+       // Create partIter on stack
+       pi := &partIter{}
+
+       // Initialize iterator for full scan
+       pi.init(bma, p, minKey, maxKey)
+
+       // Iterate through all blocks
+       for pi.nextBlock() {
+               if err := ctx.Err(); err != nil {
+                       return err
+               }
+
+               // Create a temporary QueryRequest to reuse existing 
blockCursor infrastructure
+               tmpReq := QueryRequest{
+                       TagFilter:     req.TagFilter,
+                       MinKey:        &minKey,
+                       MaxKey:        &maxKey,
+                       TagProjection: req.TagProjection,
+                       MaxBatchSize:  maxBatchSize,
+               }
+
+               bc := generateBlockCursor()
+               bc.init(p, pi.curBlock, tmpReq)
+
+               // Load block data
+               tmpBlock := generateBlock()
+
+               // When we have a TagFilter, we need to load ALL tags so the 
filter can check them.
+               // Pass nil to loadBlockCursor to load all available tags.
+               var tagsToLoad map[string]struct{}
+               if req.TagFilter != nil {
+                       // Load all tags when filtering (nil/empty map triggers 
loading all tags)
+                       tagsToLoad = nil
+               } else if len(req.TagProjection) > 0 {

Review Comment:
   When TagFilter is present, all tags are loaded regardless of which tags are 
actually used in the filter. This could be inefficient for wide tables with 
many tags. Consider analyzing the TagFilter to determine which specific tags it 
references and only load those tags plus any projected tags.



##########
banyand/trace/query.go:
##########
@@ -510,10 +531,19 @@ func mustDecodeTagValueAndArray(valueType pbv1.ValueType, 
value []byte, valueArr
        }
        switch valueType {
        case pbv1.ValueTypeInt64:
+               if value == nil {
+                       return pbv1.NullTagValue
+               }

Review Comment:
   The nil check for `value` is repeated across multiple case branches (lines 
534, 539, 544, 580). Consider extracting this check before the switch statement 
to reduce code duplication: check once at the beginning and return NullTagValue 
if value is nil for non-array types.



##########
banyand/internal/sidx/scan_query.go:
##########
@@ -0,0 +1,203 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package sidx
+
+import (
+       "context"
+       "math"
+
+       "github.com/apache/skywalking-banyandb/api/common"
+)
+
+// ScanQuery executes a synchronous full-scan query.
+func (s *sidx) ScanQuery(ctx context.Context, req ScanQueryRequest) 
([]*QueryResponse, error) {
+       if err := req.Validate(); err != nil {
+               return nil, err
+       }
+
+       snap := s.currentSnapshot()
+       if snap == nil {
+               return nil, nil
+       }
+       defer snap.decRef()
+
+       // Set default batch size
+       maxBatchSize := req.MaxBatchSize
+       if maxBatchSize <= 0 {
+               maxBatchSize = 1000
+       }
+
+       // Set key range
+       minKey := int64(math.MinInt64)
+       maxKey := int64(math.MaxInt64)
+       if req.MinKey != nil {
+               minKey = *req.MinKey
+       }
+       if req.MaxKey != nil {
+               maxKey = *req.MaxKey
+       }
+
+       var results []*QueryResponse
+
+       // Prepare Tags map if projection is specified
+       var tagsMap map[string][]string
+       if len(req.TagProjection) > 0 {
+               tagsMap = make(map[string][]string)
+               for _, proj := range req.TagProjection {
+                       for _, tagName := range proj.Names {
+                               tagsMap[tagName] = make([]string, 0, 
maxBatchSize)
+                       }
+               }
+       }
+
+       currentBatch := &QueryResponse{
+               Keys:    make([]int64, 0, maxBatchSize),
+               Data:    make([][]byte, 0, maxBatchSize),
+               SIDs:    make([]common.SeriesID, 0, maxBatchSize),
+               PartIDs: make([]uint64, 0, maxBatchSize),
+               Tags:    tagsMap,
+       }
+
+       // Scan all parts
+       totalParts := len(snap.parts)
+       for partIdx, pw := range snap.parts {
+               rowsBefore := 0
+               for _, res := range results {
+                       rowsBefore += res.Len()
+               }
+               rowsBefore += currentBatch.Len()

Review Comment:
   This definition of rowsBefore is never used.
   ```suggestion
   
   ```



##########
banyand/cmd/dump/trace.go:
##########
@@ -877,3 +772,462 @@ func unmarshalTagMetadata(tm *tagMetadata, src []byte) 
error {
 
        return nil
 }
+
+// Helper functions for new shard-level dump.
+
+func discoverTracePartIDs(shardPath string) ([]uint64, error) {
+       entries, err := os.ReadDir(shardPath)
+       if err != nil {
+               return nil, fmt.Errorf("failed to read shard directory: %w", 
err)
+       }
+
+       var partIDs []uint64
+       for _, entry := range entries {
+               if !entry.IsDir() {
+                       continue
+               }
+               // Skip special directories
+               name := entry.Name()
+               if name == "sidx" || name == "meta" {
+                       continue
+               }
+               // Try to parse as hex part ID
+               partID, err := strconv.ParseUint(name, 16, 64)
+               if err == nil {
+                       partIDs = append(partIDs, partID)
+               }
+       }
+
+       sort.Slice(partIDs, func(i, j int) bool {
+               return partIDs[i] < partIDs[j]
+       })
+
+       return partIDs, nil
+}
+
+func loadTraceSeriesMap(segmentPath string) (map[common.SeriesID]string, 
error) {
+       seriesIndexPath := filepath.Join(segmentPath, "sidx")
+
+       l := logger.GetLogger("dump-trace")
+
+       // Create inverted index store
+       store, err := inverted.NewStore(inverted.StoreOpts{
+               Path:   seriesIndexPath,
+               Logger: l,
+       })
+       if err != nil {
+               return nil, fmt.Errorf("failed to open series index: %w", err)
+       }
+       defer store.Close()
+
+       // Get series iterator
+       ctx := context.Background()
+       iter, err := store.SeriesIterator(ctx)
+       if err != nil {
+               return nil, fmt.Errorf("failed to create series iterator: %w", 
err)
+       }
+       defer iter.Close()
+
+       // Build map of SeriesID -> text representation
+       seriesMap := make(map[common.SeriesID]string)
+       for iter.Next() {
+               series := iter.Val()
+               if len(series.EntityValues) > 0 {
+                       seriesID := 
common.SeriesID(convert.Hash(series.EntityValues))
+                       // Convert EntityValues bytes to readable string
+                       seriesText := string(series.EntityValues)
+                       seriesMap[seriesID] = seriesText
+               }
+       }
+
+       return seriesMap, nil
+}
+
+func parseTraceCriteriaJSON(criteriaJSON string) (*modelv1.Criteria, error) {
+       criteria := &modelv1.Criteria{}
+       err := protojson.Unmarshal([]byte(criteriaJSON), criteria)
+       if err != nil {
+               return nil, fmt.Errorf("invalid criteria JSON: %w", err)
+       }
+       return criteria, nil
+}
+
+func parseTraceProjectionTags(projectionStr string) []string {
+       if projectionStr == "" {
+               return nil
+       }
+
+       tags := strings.Split(projectionStr, ",")
+       result := make([]string, 0, len(tags))
+       for _, tag := range tags {
+               tag = strings.TrimSpace(tag)
+               if tag != "" {
+                       result = append(result, tag)
+               }
+       }
+       return result
+}
+
+func calculateSeriesIDFromTags(tags map[string][]byte) common.SeriesID {
+       // Extract entity values from tags to calculate series ID
+       // Entity tags typically follow naming conventions like "service_id", 
"instance_id", etc.
+       // We'll collect all tag key-value pairs and hash them

Review Comment:
   The comment states 'Entity tags typically follow naming conventions' but the 
implementation hashes ALL tags, not just entity tags. This could produce 
incorrect series IDs if non-entity tags are included. Either update the 
implementation to filter for entity tags only, or update the comment to 
accurately reflect that all tags are hashed.
   ```suggestion
        // Extract values from all tags to calculate series ID.
        // All tag key-value pairs are collected and hashed to produce the 
series ID.
        // If only entity tags should be used, filter them before calling this 
function.
   ```



##########
banyand/cmd/dump/sidx.go:
##########
@@ -0,0 +1,1079 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package main
+
+import (
+       "context"
+       "encoding/csv"
+       "encoding/json"
+       "fmt"
+       "math"
+       "os"
+       "path/filepath"
+       "sort"
+       "strconv"
+       "strings"
+       "time"
+
+       "github.com/spf13/cobra"
+       "google.golang.org/protobuf/encoding/protojson"
+
+       "github.com/apache/skywalking-banyandb/api/common"
+       databasev1 
"github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1"
+       modelv1 
"github.com/apache/skywalking-banyandb/api/proto/banyandb/model/v1"
+       "github.com/apache/skywalking-banyandb/banyand/internal/sidx"
+       "github.com/apache/skywalking-banyandb/banyand/protector"
+       "github.com/apache/skywalking-banyandb/pkg/convert"
+       "github.com/apache/skywalking-banyandb/pkg/fs"
+       "github.com/apache/skywalking-banyandb/pkg/index/inverted"
+       "github.com/apache/skywalking-banyandb/pkg/logger"
+       pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1"
+       "github.com/apache/skywalking-banyandb/pkg/query/logical"
+       "github.com/apache/skywalking-banyandb/pkg/query/model"
+)
+
+func newSidxCmd() *cobra.Command {
+       var sidxPath string
+       var segmentPath string
+       var criteriaJSON string
+       var csvOutput bool
+       var timeBegin string
+       var timeEnd string
+       var fullScan bool
+       var projectionTags string
+       var dataFilter string
+
+       cmd := &cobra.Command{
+               Use:   "sidx",
+               Short: "Dump sidx (secondary index) data",
+               Long: `Dump and display contents of a sidx directory with 
filtering.
+Query results include which part each row belongs to.
+
+The tool automatically discovers series IDs from the segment's series index.`,
+               Example: `  # Display sidx data in text format
+  dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path 
/path/to/segment
+
+  # Query with time range filter
+  dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path 
/path/to/segment \
+    --time-begin "2025-11-23T05:05:00Z" --time-end "2025-11-23T05:20:00Z"
+
+  # Query with criteria filter
+  dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path 
/path/to/segment \
+    -criteria 
'{"condition":{"name":"query","op":"BINARY_OP_HAVING","value":{"strArray":{"value":["val1","val2"]}}}}'
+
+  # Full scan mode (scans all series without requiring series ID discovery)
+  dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path 
/path/to/segment --full-scan
+
+  # Project specific tags as columns
+  dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path 
/path/to/segment \
+    --projection "tag1,tag2,tag3"
+
+  # Filter by data content
+  dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path 
/path/to/segment \
+    --data-filter "2dd1624d5719fdfedc526f3f24d00342-4423100"
+
+  # Output as CSV
+  dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path 
/path/to/segment -csv`,
+               RunE: func(_ *cobra.Command, _ []string) error {
+                       if sidxPath == "" {
+                               return fmt.Errorf("-sidx-path flag is required")
+                       }
+                       if segmentPath == "" {
+                               return fmt.Errorf("-segment-path flag is 
required")
+                       }
+                       return dumpSidx(sidxPath, segmentPath, criteriaJSON, 
csvOutput, timeBegin, timeEnd, fullScan, projectionTags, dataFilter)
+               },
+       }
+
+       cmd.Flags().StringVarP(&sidxPath, "sidx-path", "s", "", "Path to the 
sidx directory (required)")
+       cmd.Flags().StringVarP(&segmentPath, "segment-path", "g", "", "Path to 
the segment directory (required)")
+       cmd.Flags().StringVarP(&criteriaJSON, "criteria", "c", "", "Criteria 
filter as JSON string")
+       cmd.Flags().BoolVar(&csvOutput, "csv", false, "Output as CSV format")
+       cmd.Flags().StringVar(&timeBegin, "time-begin", "", "Begin time in 
RFC3339 format (e.g., 2025-11-23T05:05:00Z)")
+       cmd.Flags().StringVar(&timeEnd, "time-end", "", "End time in RFC3339 
format (e.g., 2025-11-23T05:20:00Z)")
+       cmd.Flags().BoolVar(&fullScan, "full-scan", false, "Scan all series 
without requiring series ID discovery")
+       cmd.Flags().StringVarP(&projectionTags, "projection", "p", "", 
"Comma-separated list of tags to include as columns (e.g., tag1,tag2,tag3)")
+       cmd.Flags().StringVarP(&dataFilter, "data-filter", "d", "", "Filter 
rows by data content (substring match)")
+       _ = cmd.MarkFlagRequired("sidx-path")
+       _ = cmd.MarkFlagRequired("segment-path")
+
+       return cmd
+}
+
+func dumpSidx(sidxPath, segmentPath, criteriaJSON string, csvOutput bool, 
timeBegin, timeEnd string, fullScan bool, projectionTags string, dataFilter 
string) error {

Review Comment:
   [nitpick] Function has 9 parameters which exceeds recommended limits and 
makes the function signature hard to read and maintain. Consider using an 
options struct pattern to group related parameters.



##########
banyand/cmd/dump/trace.go:
##########
@@ -30,329 +31,269 @@ import (
        "time"
 
        "github.com/spf13/cobra"
+       "google.golang.org/protobuf/encoding/protojson"
 
+       "github.com/apache/skywalking-banyandb/api/common"
+       databasev1 
"github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1"
+       modelv1 
"github.com/apache/skywalking-banyandb/api/proto/banyandb/model/v1"
        internalencoding 
"github.com/apache/skywalking-banyandb/banyand/internal/encoding"
        "github.com/apache/skywalking-banyandb/pkg/bytes"
        "github.com/apache/skywalking-banyandb/pkg/compress/zstd"
        "github.com/apache/skywalking-banyandb/pkg/convert"
        "github.com/apache/skywalking-banyandb/pkg/encoding"
        "github.com/apache/skywalking-banyandb/pkg/fs"
+       "github.com/apache/skywalking-banyandb/pkg/index/inverted"
+       "github.com/apache/skywalking-banyandb/pkg/logger"
        pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1"
+       "github.com/apache/skywalking-banyandb/pkg/query/logical"
 )
 
 func newTraceCmd() *cobra.Command {
-       var partPath string
+       var shardPath string
+       var segmentPath string
        var verbose bool
        var csvOutput bool
+       var criteriaJSON string
+       var projectionTags string
 
        cmd := &cobra.Command{
                Use:   "trace",
-               Short: "Dump trace part data",
-               Long: `Dump and display contents of a trace part directory.
-Outputs trace data in human-readable format or CSV.`,
-               Example: `  # Display trace data in text format
-  dump trace -path /path/to/part/0000000000004db4
+               Short: "Dump trace shard data",
+               Long: `Dump and display contents of a trace shard directory 
(containing multiple parts).
+Outputs trace data in human-readable format or CSV.
+
+Supports filtering by criteria and projecting specific tags.`,
+               Example: `  # Display trace data from shard in text format
+  dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment
 
   # Display with verbose hex dumps
-  dump trace -path /path/to/part/0000000000004db4 -v
+  dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment -v
+
+  # Filter by criteria
+  dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment \
+    --criteria 
'{"condition":{"name":"query","op":"BINARY_OP_HAVING","value":{"strArray":{"value":["tag1=value1","tag2=value2"]}}}}'
+
+  # Project specific tags
+  dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment \
+    --projection "tag1,tag2,tag3"
 
   # Output as CSV
-  dump trace -path /path/to/part/0000000000004db4 -csv
+  dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment 
--csv
 
   # Save CSV to file
-  dump trace -path /path/to/part/0000000000004db4 -csv > output.csv`,
+  dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment 
--csv > output.csv`,
                RunE: func(_ *cobra.Command, _ []string) error {
-                       if partPath == "" {
-                               return fmt.Errorf("-path flag is required")
+                       if shardPath == "" {
+                               return fmt.Errorf("--shard-path flag is 
required")
                        }
-                       return dumpTracePart(partPath, verbose, csvOutput)
+                       if segmentPath == "" {
+                               return fmt.Errorf("--segment-path flag is 
required")
+                       }
+                       return dumpTraceShard(shardPath, segmentPath, verbose, 
csvOutput, criteriaJSON, projectionTags)
                },
        }
 
-       cmd.Flags().StringVarP(&partPath, "path", "p", "", "Path to the trace 
part directory (required)")
+       cmd.Flags().StringVar(&shardPath, "shard-path", "", "Path to the shard 
directory (required)")
+       cmd.Flags().StringVarP(&segmentPath, "segment-path", "g", "", "Path to 
the segment directory (required)")
        cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Verbose output 
(show raw data)")
-       cmd.Flags().BoolVarP(&csvOutput, "csv", "c", false, "Output as CSV 
format")
-       _ = cmd.MarkFlagRequired("path")
+       cmd.Flags().BoolVar(&csvOutput, "csv", false, "Output as CSV format")
+       cmd.Flags().StringVarP(&criteriaJSON, "criteria", "c", "", "Criteria 
filter as JSON string")
+       cmd.Flags().StringVarP(&projectionTags, "projection", "p", "", 
"Comma-separated list of tags to include as columns (e.g., tag1,tag2,tag3)")
+       _ = cmd.MarkFlagRequired("shard-path")
+       _ = cmd.MarkFlagRequired("segment-path")
 
        return cmd
 }
 
-func dumpTracePart(partPath string, verbose bool, csvOutput bool) error {
-       // Get the part ID from the directory name
-       partName := filepath.Base(partPath)
-       partID, err := strconv.ParseUint(partName, 16, 64)
+//nolint:gocyclo // dumpTraceShard has high complexity due to multiple output 
formats and filtering options
+func dumpTraceShard(shardPath, segmentPath string, verbose bool, csvOutput 
bool, criteriaJSON, projectionTagsStr string) error {

Review Comment:
   [nitpick] Function has 6 parameters including multiple boolean flags. 
Consider using an options struct or config object to make the function 
signature more maintainable and extensible. For example: `type DumpOptions 
struct { Verbose bool; CSVOutput bool; Criteria string; ProjectionTags string }`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Fix empty tag of SIDX [skywalking-banyandb]

Reply via email to