Copilot commented on code in PR #854: URL: https://github.com/apache/skywalking-banyandb/pull/854#discussion_r2544432311
########## banyand/internal/sidx/scan_query.go: ########## @@ -0,0 +1,203 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package sidx + +import ( + "context" + "math" + + "github.com/apache/skywalking-banyandb/api/common" +) + +// ScanQuery executes a synchronous full-scan query. +func (s *sidx) ScanQuery(ctx context.Context, req ScanQueryRequest) ([]*QueryResponse, error) { + if err := req.Validate(); err != nil { + return nil, err + } + + snap := s.currentSnapshot() + if snap == nil { + return nil, nil + } + defer snap.decRef() + + // Set default batch size + maxBatchSize := req.MaxBatchSize + if maxBatchSize <= 0 { + maxBatchSize = 1000 + } + + // Set key range + minKey := int64(math.MinInt64) + maxKey := int64(math.MaxInt64) + if req.MinKey != nil { + minKey = *req.MinKey + } + if req.MaxKey != nil { + maxKey = *req.MaxKey + } + + var results []*QueryResponse + + // Prepare Tags map if projection is specified + var tagsMap map[string][]string + if len(req.TagProjection) > 0 { + tagsMap = make(map[string][]string) + for _, proj := range req.TagProjection { + for _, tagName := range proj.Names { + tagsMap[tagName] = make([]string, 0, maxBatchSize) + } + } + } + + currentBatch := &QueryResponse{ + Keys: make([]int64, 0, maxBatchSize), + Data: make([][]byte, 0, maxBatchSize), + SIDs: make([]common.SeriesID, 0, maxBatchSize), + PartIDs: make([]uint64, 0, maxBatchSize), + Tags: tagsMap, + } + + // Scan all parts + totalParts := len(snap.parts) + for partIdx, pw := range snap.parts { + rowsBefore := 0 + for _, res := range results { + rowsBefore += res.Len() + } + rowsBefore += currentBatch.Len() + + if err := s.scanPart(ctx, pw, req, minKey, maxKey, &results, ¤tBatch, maxBatchSize); err != nil { + return nil, err + } + + // Count total rows found so far + totalRowsFound := 0 + for _, res := range results { + totalRowsFound += res.Len() + } + totalRowsFound += currentBatch.Len() + + // Report progress if callback is provided + if req.OnProgress != nil { + req.OnProgress(partIdx+1, totalParts, totalRowsFound) + } + } + + // Add remaining batch if not empty + if currentBatch.Len() > 0 { + results = append(results, currentBatch) + } + + return results, nil +} + +func (s *sidx) scanPart(ctx context.Context, pw *partWrapper, req ScanQueryRequest, + minKey, maxKey int64, results *[]*QueryResponse, currentBatch **QueryResponse, + maxBatchSize int, +) error { Review Comment: [nitpick] The function signature passes `currentBatch` as a double pointer (`**QueryResponse`), which is unusual and can be confusing. Consider refactoring to return the updated batch as a return value alongside the error, making the API clearer: `func scanPart(...) (*QueryResponse, error)` and have the caller handle appending to results. ########## banyand/internal/sidx/sidx.go: ########## @@ -496,9 +497,37 @@ func (bc *blockCursor) copyTo(result *QueryResponse) bool { result.SIDs = append(result.SIDs, bc.seriesID) result.PartIDs = append(result.PartIDs, bc.p.ID()) + // Copy projected tags if specified + if len(bc.request.TagProjection) > 0 && len(result.Tags) > 0 { + for _, proj := range bc.request.TagProjection { + for _, tagName := range proj.Names { + if tagData, exists := bc.tags[tagName]; exists && bc.idx < len(tagData) { + tagValue := formatTagValue(tagData[bc.idx]) + result.Tags[tagName] = append(result.Tags[tagName], tagValue) + } else { + // Tag not present for this row + result.Tags[tagName] = append(result.Tags[tagName], "") + } + } + } + } + return true } +// formatTagValue converts a Tag to a string representation. +func formatTagValue(tag Tag) string { + if len(tag.ValueArr) > 0 { + // Array of values + values := make([]string, len(tag.ValueArr)) + for i, v := range tag.ValueArr { + values[i] = string(v) + } + return "[" + strings.Join(values, ",") + "]" + } + return string(tag.Value) +} Review Comment: The formatTagValue function lacks documentation. Add a comment describing that this function converts a Tag to a string representation, with arrays formatted as comma-separated values in brackets. Also clarify what happens when both Value and ValueArr are empty. ########## banyand/internal/sidx/scan_query.go: ########## @@ -0,0 +1,203 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package sidx + +import ( + "context" + "math" + + "github.com/apache/skywalking-banyandb/api/common" +) + +// ScanQuery executes a synchronous full-scan query. +func (s *sidx) ScanQuery(ctx context.Context, req ScanQueryRequest) ([]*QueryResponse, error) { + if err := req.Validate(); err != nil { + return nil, err + } + + snap := s.currentSnapshot() + if snap == nil { + return nil, nil + } + defer snap.decRef() + + // Set default batch size + maxBatchSize := req.MaxBatchSize + if maxBatchSize <= 0 { + maxBatchSize = 1000 + } + + // Set key range + minKey := int64(math.MinInt64) + maxKey := int64(math.MaxInt64) + if req.MinKey != nil { + minKey = *req.MinKey + } + if req.MaxKey != nil { + maxKey = *req.MaxKey + } + + var results []*QueryResponse + + // Prepare Tags map if projection is specified + var tagsMap map[string][]string + if len(req.TagProjection) > 0 { + tagsMap = make(map[string][]string) + for _, proj := range req.TagProjection { + for _, tagName := range proj.Names { + tagsMap[tagName] = make([]string, 0, maxBatchSize) + } + } + } + + currentBatch := &QueryResponse{ + Keys: make([]int64, 0, maxBatchSize), + Data: make([][]byte, 0, maxBatchSize), + SIDs: make([]common.SeriesID, 0, maxBatchSize), + PartIDs: make([]uint64, 0, maxBatchSize), + Tags: tagsMap, + } + + // Scan all parts + totalParts := len(snap.parts) + for partIdx, pw := range snap.parts { + rowsBefore := 0 + for _, res := range results { + rowsBefore += res.Len() + } + rowsBefore += currentBatch.Len() + + if err := s.scanPart(ctx, pw, req, minKey, maxKey, &results, ¤tBatch, maxBatchSize); err != nil { + return nil, err + } + + // Count total rows found so far + totalRowsFound := 0 + for _, res := range results { + totalRowsFound += res.Len() + } + totalRowsFound += currentBatch.Len() + + // Report progress if callback is provided + if req.OnProgress != nil { + req.OnProgress(partIdx+1, totalParts, totalRowsFound) + } + } + + // Add remaining batch if not empty + if currentBatch.Len() > 0 { + results = append(results, currentBatch) + } + + return results, nil +} + +func (s *sidx) scanPart(ctx context.Context, pw *partWrapper, req ScanQueryRequest, + minKey, maxKey int64, results *[]*QueryResponse, currentBatch **QueryResponse, + maxBatchSize int, +) error { + p := pw.p + bma := generateBlockMetadataArray() + defer releaseBlockMetadataArray(bma) + + // Create partIter on stack + pi := &partIter{} + + // Initialize iterator for full scan + pi.init(bma, p, minKey, maxKey) + + // Iterate through all blocks + for pi.nextBlock() { + if err := ctx.Err(); err != nil { + return err + } + + // Create a temporary QueryRequest to reuse existing blockCursor infrastructure + tmpReq := QueryRequest{ + TagFilter: req.TagFilter, + MinKey: &minKey, + MaxKey: &maxKey, + TagProjection: req.TagProjection, + MaxBatchSize: maxBatchSize, + } + + bc := generateBlockCursor() + bc.init(p, pi.curBlock, tmpReq) + + // Load block data + tmpBlock := generateBlock() + + // When we have a TagFilter, we need to load ALL tags so the filter can check them. + // Pass nil to loadBlockCursor to load all available tags. + var tagsToLoad map[string]struct{} + if req.TagFilter != nil { + // Load all tags when filtering (nil/empty map triggers loading all tags) + tagsToLoad = nil + } else if len(req.TagProjection) > 0 { Review Comment: When TagFilter is present, all tags are loaded regardless of which tags are actually used in the filter. This could be inefficient for wide tables with many tags. Consider analyzing the TagFilter to determine which specific tags it references and only load those tags plus any projected tags. ########## banyand/trace/query.go: ########## @@ -510,10 +531,19 @@ func mustDecodeTagValueAndArray(valueType pbv1.ValueType, value []byte, valueArr } switch valueType { case pbv1.ValueTypeInt64: + if value == nil { + return pbv1.NullTagValue + } Review Comment: The nil check for `value` is repeated across multiple case branches (lines 534, 539, 544, 580). Consider extracting this check before the switch statement to reduce code duplication: check once at the beginning and return NullTagValue if value is nil for non-array types. ########## banyand/internal/sidx/scan_query.go: ########## @@ -0,0 +1,203 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package sidx + +import ( + "context" + "math" + + "github.com/apache/skywalking-banyandb/api/common" +) + +// ScanQuery executes a synchronous full-scan query. +func (s *sidx) ScanQuery(ctx context.Context, req ScanQueryRequest) ([]*QueryResponse, error) { + if err := req.Validate(); err != nil { + return nil, err + } + + snap := s.currentSnapshot() + if snap == nil { + return nil, nil + } + defer snap.decRef() + + // Set default batch size + maxBatchSize := req.MaxBatchSize + if maxBatchSize <= 0 { + maxBatchSize = 1000 + } + + // Set key range + minKey := int64(math.MinInt64) + maxKey := int64(math.MaxInt64) + if req.MinKey != nil { + minKey = *req.MinKey + } + if req.MaxKey != nil { + maxKey = *req.MaxKey + } + + var results []*QueryResponse + + // Prepare Tags map if projection is specified + var tagsMap map[string][]string + if len(req.TagProjection) > 0 { + tagsMap = make(map[string][]string) + for _, proj := range req.TagProjection { + for _, tagName := range proj.Names { + tagsMap[tagName] = make([]string, 0, maxBatchSize) + } + } + } + + currentBatch := &QueryResponse{ + Keys: make([]int64, 0, maxBatchSize), + Data: make([][]byte, 0, maxBatchSize), + SIDs: make([]common.SeriesID, 0, maxBatchSize), + PartIDs: make([]uint64, 0, maxBatchSize), + Tags: tagsMap, + } + + // Scan all parts + totalParts := len(snap.parts) + for partIdx, pw := range snap.parts { + rowsBefore := 0 + for _, res := range results { + rowsBefore += res.Len() + } + rowsBefore += currentBatch.Len() Review Comment: This definition of rowsBefore is never used. ```suggestion ``` ########## banyand/cmd/dump/trace.go: ########## @@ -877,3 +772,462 @@ func unmarshalTagMetadata(tm *tagMetadata, src []byte) error { return nil } + +// Helper functions for new shard-level dump. + +func discoverTracePartIDs(shardPath string) ([]uint64, error) { + entries, err := os.ReadDir(shardPath) + if err != nil { + return nil, fmt.Errorf("failed to read shard directory: %w", err) + } + + var partIDs []uint64 + for _, entry := range entries { + if !entry.IsDir() { + continue + } + // Skip special directories + name := entry.Name() + if name == "sidx" || name == "meta" { + continue + } + // Try to parse as hex part ID + partID, err := strconv.ParseUint(name, 16, 64) + if err == nil { + partIDs = append(partIDs, partID) + } + } + + sort.Slice(partIDs, func(i, j int) bool { + return partIDs[i] < partIDs[j] + }) + + return partIDs, nil +} + +func loadTraceSeriesMap(segmentPath string) (map[common.SeriesID]string, error) { + seriesIndexPath := filepath.Join(segmentPath, "sidx") + + l := logger.GetLogger("dump-trace") + + // Create inverted index store + store, err := inverted.NewStore(inverted.StoreOpts{ + Path: seriesIndexPath, + Logger: l, + }) + if err != nil { + return nil, fmt.Errorf("failed to open series index: %w", err) + } + defer store.Close() + + // Get series iterator + ctx := context.Background() + iter, err := store.SeriesIterator(ctx) + if err != nil { + return nil, fmt.Errorf("failed to create series iterator: %w", err) + } + defer iter.Close() + + // Build map of SeriesID -> text representation + seriesMap := make(map[common.SeriesID]string) + for iter.Next() { + series := iter.Val() + if len(series.EntityValues) > 0 { + seriesID := common.SeriesID(convert.Hash(series.EntityValues)) + // Convert EntityValues bytes to readable string + seriesText := string(series.EntityValues) + seriesMap[seriesID] = seriesText + } + } + + return seriesMap, nil +} + +func parseTraceCriteriaJSON(criteriaJSON string) (*modelv1.Criteria, error) { + criteria := &modelv1.Criteria{} + err := protojson.Unmarshal([]byte(criteriaJSON), criteria) + if err != nil { + return nil, fmt.Errorf("invalid criteria JSON: %w", err) + } + return criteria, nil +} + +func parseTraceProjectionTags(projectionStr string) []string { + if projectionStr == "" { + return nil + } + + tags := strings.Split(projectionStr, ",") + result := make([]string, 0, len(tags)) + for _, tag := range tags { + tag = strings.TrimSpace(tag) + if tag != "" { + result = append(result, tag) + } + } + return result +} + +func calculateSeriesIDFromTags(tags map[string][]byte) common.SeriesID { + // Extract entity values from tags to calculate series ID + // Entity tags typically follow naming conventions like "service_id", "instance_id", etc. + // We'll collect all tag key-value pairs and hash them Review Comment: The comment states 'Entity tags typically follow naming conventions' but the implementation hashes ALL tags, not just entity tags. This could produce incorrect series IDs if non-entity tags are included. Either update the implementation to filter for entity tags only, or update the comment to accurately reflect that all tags are hashed. ```suggestion // Extract values from all tags to calculate series ID. // All tag key-value pairs are collected and hashed to produce the series ID. // If only entity tags should be used, filter them before calling this function. ``` ########## banyand/cmd/dump/sidx.go: ########## @@ -0,0 +1,1079 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package main + +import ( + "context" + "encoding/csv" + "encoding/json" + "fmt" + "math" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "time" + + "github.com/spf13/cobra" + "google.golang.org/protobuf/encoding/protojson" + + "github.com/apache/skywalking-banyandb/api/common" + databasev1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1" + modelv1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/model/v1" + "github.com/apache/skywalking-banyandb/banyand/internal/sidx" + "github.com/apache/skywalking-banyandb/banyand/protector" + "github.com/apache/skywalking-banyandb/pkg/convert" + "github.com/apache/skywalking-banyandb/pkg/fs" + "github.com/apache/skywalking-banyandb/pkg/index/inverted" + "github.com/apache/skywalking-banyandb/pkg/logger" + pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1" + "github.com/apache/skywalking-banyandb/pkg/query/logical" + "github.com/apache/skywalking-banyandb/pkg/query/model" +) + +func newSidxCmd() *cobra.Command { + var sidxPath string + var segmentPath string + var criteriaJSON string + var csvOutput bool + var timeBegin string + var timeEnd string + var fullScan bool + var projectionTags string + var dataFilter string + + cmd := &cobra.Command{ + Use: "sidx", + Short: "Dump sidx (secondary index) data", + Long: `Dump and display contents of a sidx directory with filtering. +Query results include which part each row belongs to. + +The tool automatically discovers series IDs from the segment's series index.`, + Example: ` # Display sidx data in text format + dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path /path/to/segment + + # Query with time range filter + dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path /path/to/segment \ + --time-begin "2025-11-23T05:05:00Z" --time-end "2025-11-23T05:20:00Z" + + # Query with criteria filter + dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path /path/to/segment \ + -criteria '{"condition":{"name":"query","op":"BINARY_OP_HAVING","value":{"strArray":{"value":["val1","val2"]}}}}' + + # Full scan mode (scans all series without requiring series ID discovery) + dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path /path/to/segment --full-scan + + # Project specific tags as columns + dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path /path/to/segment \ + --projection "tag1,tag2,tag3" + + # Filter by data content + dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path /path/to/segment \ + --data-filter "2dd1624d5719fdfedc526f3f24d00342-4423100" + + # Output as CSV + dump sidx -sidx-path /path/to/sidx/timestamp_millis -segment-path /path/to/segment -csv`, + RunE: func(_ *cobra.Command, _ []string) error { + if sidxPath == "" { + return fmt.Errorf("-sidx-path flag is required") + } + if segmentPath == "" { + return fmt.Errorf("-segment-path flag is required") + } + return dumpSidx(sidxPath, segmentPath, criteriaJSON, csvOutput, timeBegin, timeEnd, fullScan, projectionTags, dataFilter) + }, + } + + cmd.Flags().StringVarP(&sidxPath, "sidx-path", "s", "", "Path to the sidx directory (required)") + cmd.Flags().StringVarP(&segmentPath, "segment-path", "g", "", "Path to the segment directory (required)") + cmd.Flags().StringVarP(&criteriaJSON, "criteria", "c", "", "Criteria filter as JSON string") + cmd.Flags().BoolVar(&csvOutput, "csv", false, "Output as CSV format") + cmd.Flags().StringVar(&timeBegin, "time-begin", "", "Begin time in RFC3339 format (e.g., 2025-11-23T05:05:00Z)") + cmd.Flags().StringVar(&timeEnd, "time-end", "", "End time in RFC3339 format (e.g., 2025-11-23T05:20:00Z)") + cmd.Flags().BoolVar(&fullScan, "full-scan", false, "Scan all series without requiring series ID discovery") + cmd.Flags().StringVarP(&projectionTags, "projection", "p", "", "Comma-separated list of tags to include as columns (e.g., tag1,tag2,tag3)") + cmd.Flags().StringVarP(&dataFilter, "data-filter", "d", "", "Filter rows by data content (substring match)") + _ = cmd.MarkFlagRequired("sidx-path") + _ = cmd.MarkFlagRequired("segment-path") + + return cmd +} + +func dumpSidx(sidxPath, segmentPath, criteriaJSON string, csvOutput bool, timeBegin, timeEnd string, fullScan bool, projectionTags string, dataFilter string) error { Review Comment: [nitpick] Function has 9 parameters which exceeds recommended limits and makes the function signature hard to read and maintain. Consider using an options struct pattern to group related parameters. ########## banyand/cmd/dump/trace.go: ########## @@ -30,329 +31,269 @@ import ( "time" "github.com/spf13/cobra" + "google.golang.org/protobuf/encoding/protojson" + "github.com/apache/skywalking-banyandb/api/common" + databasev1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1" + modelv1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/model/v1" internalencoding "github.com/apache/skywalking-banyandb/banyand/internal/encoding" "github.com/apache/skywalking-banyandb/pkg/bytes" "github.com/apache/skywalking-banyandb/pkg/compress/zstd" "github.com/apache/skywalking-banyandb/pkg/convert" "github.com/apache/skywalking-banyandb/pkg/encoding" "github.com/apache/skywalking-banyandb/pkg/fs" + "github.com/apache/skywalking-banyandb/pkg/index/inverted" + "github.com/apache/skywalking-banyandb/pkg/logger" pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1" + "github.com/apache/skywalking-banyandb/pkg/query/logical" ) func newTraceCmd() *cobra.Command { - var partPath string + var shardPath string + var segmentPath string var verbose bool var csvOutput bool + var criteriaJSON string + var projectionTags string cmd := &cobra.Command{ Use: "trace", - Short: "Dump trace part data", - Long: `Dump and display contents of a trace part directory. -Outputs trace data in human-readable format or CSV.`, - Example: ` # Display trace data in text format - dump trace -path /path/to/part/0000000000004db4 + Short: "Dump trace shard data", + Long: `Dump and display contents of a trace shard directory (containing multiple parts). +Outputs trace data in human-readable format or CSV. + +Supports filtering by criteria and projecting specific tags.`, + Example: ` # Display trace data from shard in text format + dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment # Display with verbose hex dumps - dump trace -path /path/to/part/0000000000004db4 -v + dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment -v + + # Filter by criteria + dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment \ + --criteria '{"condition":{"name":"query","op":"BINARY_OP_HAVING","value":{"strArray":{"value":["tag1=value1","tag2=value2"]}}}}' + + # Project specific tags + dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment \ + --projection "tag1,tag2,tag3" # Output as CSV - dump trace -path /path/to/part/0000000000004db4 -csv + dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment --csv # Save CSV to file - dump trace -path /path/to/part/0000000000004db4 -csv > output.csv`, + dump trace --shard-path /path/to/shard-0 --segment-path /path/to/segment --csv > output.csv`, RunE: func(_ *cobra.Command, _ []string) error { - if partPath == "" { - return fmt.Errorf("-path flag is required") + if shardPath == "" { + return fmt.Errorf("--shard-path flag is required") } - return dumpTracePart(partPath, verbose, csvOutput) + if segmentPath == "" { + return fmt.Errorf("--segment-path flag is required") + } + return dumpTraceShard(shardPath, segmentPath, verbose, csvOutput, criteriaJSON, projectionTags) }, } - cmd.Flags().StringVarP(&partPath, "path", "p", "", "Path to the trace part directory (required)") + cmd.Flags().StringVar(&shardPath, "shard-path", "", "Path to the shard directory (required)") + cmd.Flags().StringVarP(&segmentPath, "segment-path", "g", "", "Path to the segment directory (required)") cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Verbose output (show raw data)") - cmd.Flags().BoolVarP(&csvOutput, "csv", "c", false, "Output as CSV format") - _ = cmd.MarkFlagRequired("path") + cmd.Flags().BoolVar(&csvOutput, "csv", false, "Output as CSV format") + cmd.Flags().StringVarP(&criteriaJSON, "criteria", "c", "", "Criteria filter as JSON string") + cmd.Flags().StringVarP(&projectionTags, "projection", "p", "", "Comma-separated list of tags to include as columns (e.g., tag1,tag2,tag3)") + _ = cmd.MarkFlagRequired("shard-path") + _ = cmd.MarkFlagRequired("segment-path") return cmd } -func dumpTracePart(partPath string, verbose bool, csvOutput bool) error { - // Get the part ID from the directory name - partName := filepath.Base(partPath) - partID, err := strconv.ParseUint(partName, 16, 64) +//nolint:gocyclo // dumpTraceShard has high complexity due to multiple output formats and filtering options +func dumpTraceShard(shardPath, segmentPath string, verbose bool, csvOutput bool, criteriaJSON, projectionTagsStr string) error { Review Comment: [nitpick] Function has 6 parameters including multiple boolean flags. Consider using an options struct or config object to make the function signature more maintainable and extensible. For example: `type DumpOptions struct { Verbose bool; CSVOutput bool; Criteria string; ProjectionTags string }` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
