mrproliu commented on code in PR #1138:
URL: 
https://github.com/apache/skywalking-banyandb/pull/1138#discussion_r3278237215


##########
banyand/cmd/migration/verify.go:
##########
@@ -0,0 +1,329 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package main
+
+import (
+       "context"
+       "fmt"
+       "os"
+       "os/signal"
+       "strings"
+       "syscall"
+       "text/tabwriter"
+
+       "github.com/spf13/cobra"
+
+       "github.com/apache/skywalking-banyandb/banyand/measure"
+)
+
+func newVerifyCmd() *cobra.Command {
+       var configPath string
+
+       cmd := &cobra.Command{
+               Use:   "verify",
+               Short: "Inspect a copy run: per-(entry, group) source vs target 
row counts, target segment grid alignment, sidx doc counts",
+               Long: `verify reads the same plan.yaml that the copy run 
consumed and walks
+each (entry, group) read-only:
+
+  - sums source row count by opening every src/seg-*/shard-*/<partID>
+    and totalling partMetadata.TotalCount
+  - enumerates target/seg-*/, opens each part and the per-seg union sidx
+  - flags whether each target seg's start time aligns to the entry
+    stage's SegmentInterval grid (IntervalRule.Standard)
+  - prints all numbers (source rows, target rows per seg, sidxDocs,
+    aligned y/n) to stdout for the operator to inspect
+
+verify never fails on mismatch — it just reports. Use it after a copy
+run, optionally after the data-copy ↔ data swap, to confirm the
+migration result.`,
+               RunE: func(_ *cobra.Command, _ []string) error {
+                       plan, err := LoadCopyPlan(configPath)
+                       if err != nil {
+                               return err
+                       }
+                       // staging dir is unused by verify (read-only), pass "".
+                       cfg := plan.ToDirectCopyConfig("")
+
+                       ctx, stop := signal.NotifyContext(context.Background(),
+                               os.Interrupt, syscall.SIGTERM)
+                       defer stop()
+
+                       tally := &verifyTally{}
+                       runErr := measure.MigrationVerify(ctx, cfg, func(r 
measure.EntryGroupReport) {
+                               printOneReport(r)
+                               tally.absorb(r)
+                       })
+                       tally.printSummary()
+                       return runErr
+               },
+       }
+
+       cmd.Flags().StringVar(&configPath, "copy-config", "",
+               "path to the YAML migration copy plan that was used for 
`migration copy` (required)")
+       _ = cmd.MarkFlagRequired("copy-config")
+       return cmd
+}
+
+// verifyTally accumulates the per-(node, group) findings the callback
+// stream emits so we can print a single roll-up SUMMARY block at the
+// end of the run — the per-report stream prints itself.
+type verifyTally struct {
+       mismatches     []verifyMismatch
+       coverage       map[string]map[string]coverageState // node → group → 
(src/tgt presence)
+       nodeOrder      []string                            // first-seen 
ordering
+       groupOrder     []string                            // first-seen 
ordering
+       srcRowsTotal   uint64
+       tgtRowsTotal   uint64
+       segsTotal      int
+       segsMisaligned int
+}
+
+// coverageState records whether SOURCE and TARGET independently hold
+// any rows for one (node, group) pair. Four combinations exist, each
+// surfaced by a distinct token in the coverage table:
+//
+//     src=true, tgt=true   → "✓"  (both present, normal copy success)
+//     src=true, tgt=false  → "S"  (src has data but target is empty — copy 
lost this group)
+//     src=false, tgt=true  → "T"  (target has data without a source — orphan 
/ leftover)
+//     src=false, tgt=false → "--" (neither — PVC hash sharding excluded this 
group, normal)
+type coverageState struct {
+       src bool
+       tgt bool
+}
+
+// verifyMismatch records one (node, group) where source row count
+// did NOT equal target row count — surfaced in the SUMMARY block so
+// the operator immediately sees which PVCs / groups need follow-up.
+type verifyMismatch struct {
+       Stage    string
+       NodeName string
+       Group    string
+       SrcRows  uint64
+       TgtRows  uint64
+}
+
+// entryNodeName picks the first node from EntryNodes (entries usually
+// reference exactly one node); falls back to the entry stage when the
+// list is empty (e.g. backup-mode plans without explicit nodes).
+func entryNodeName(r measure.EntryGroupReport) string {
+       if len(r.EntryNodes) > 0 {
+               return r.EntryNodes[0]
+       }
+       return r.EntryStage
+}

Review Comment:
   Update as switch mode. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to