mrproliu commented on code in PR #1138:
URL: 
https://github.com/apache/skywalking-banyandb/pull/1138#discussion_r3278227443


##########
banyand/measure/migration_verify.go:
##########
@@ -0,0 +1,405 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package measure
+
+import (
+       "context"
+       "encoding/json"
+       "errors"
+       "fmt"
+       "os"
+       "path/filepath"
+       "sort"
+       "strconv"
+       "strings"
+       "time"
+
+       "github.com/blugelabs/bluge"
+
+       "github.com/apache/skywalking-banyandb/banyand/internal/storage"
+       "github.com/apache/skywalking-banyandb/pkg/fs"
+)
+
+// SegmentReport summarizes one `seg-*` directory under a measure
+// group root. Populated by EnumerateGroupTarget for the migration
+// verify CLI.
+//
+// Aligned is true only when ALL of these hold:
+//   - the dir name parses cleanly into a start time;
+//   - start = IntervalRule.Standard(start) (start is on the grid);
+//   - <seg>/metadata exists, is well-formed JSON, and carries a non-empty 
endTime;
+//   - end = IntervalRule.NextTime(start) (the segment spans exactly one 
bucket);
+//   - the inclusive last instant (end - 1ns) standardizes back to start (start
+//     and end fall in the same IntervalRule bucket).
+type SegmentReport struct {
+       StartTime    time.Time
+       EndTime      time.Time
+       Seg          string
+       Rows         uint64
+       SidxDocCount uint64
+       Shards       int
+       Parts        int
+       Aligned      bool
+       SidxOpened   bool
+}
+
+// EntryGroupReport aggregates source row count + target per-seg report
+// for one (entry, group) pair. Both source and target are read-only;
+// the verify command never mutates the dataset.
+type EntryGroupReport struct {
+       Group       string
+       EntryStage  string
+       EntryTarget string
+       TargetGroup string
+       EntryNodes  []string
+       SrcRoots    []string
+       TargetSegs  []SegmentReport
+       SrcRows     uint64
+       SrcParts    int
+}
+
+// VerifyShardParts reads <shardDir>'s newest `.snp`, confirms every
+// listed partID has an on-disk dir, opens each part, and returns the
+// sum of partMetadata.TotalCount plus the part count.
+//
+// Errors (instead of t.Fatalf) so this helper is usable from a CLI.
+func VerifyShardParts(shardDir string, fileSystem fs.FileSystem) (uint64, int, 
error) {
+       entries, err := os.ReadDir(shardDir)
+       if err != nil {
+               return 0, 0, fmt.Errorf("read shard: %w", err)
+       }
+       var snpPath string
+       for _, e := range entries {
+               if !e.IsDir() && strings.HasSuffix(e.Name(), 
directCopySnpSuffix) {
+                       candidate := filepath.Join(shardDir, e.Name())
+                       if snpPath == "" || candidate > snpPath {
+                               snpPath = candidate
+                       }
+               }
+       }
+       if snpPath == "" {
+               return 0, 0, fmt.Errorf("no .snp file under %s", shardDir)
+       }
+       snpRaw, err := os.ReadFile(snpPath)
+       if err != nil {
+               return 0, 0, fmt.Errorf("read .snp: %w", err)
+       }
+       var partNames []string
+       if err := json.Unmarshal(snpRaw, &partNames); err != nil {
+               return 0, 0, fmt.Errorf("parse .snp: %w", err)
+       }

Review Comment:
   Update the comments about this, using `os.ReadDir` rather than 
`fs.FileSystem.ReadDir` because the `fs.FileSystem.ReadDir` will panic if the 
directory does not exist. Verify just a read-only tool, no need to panic about 
this. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to