Copilot commented on code in PR #1060:
URL:
https://github.com/apache/skywalking-banyandb/pull/1060#discussion_r3064214202
##
banyand/internal/sidx/sync.go:
##
@@ -84,11 +84,17 @@ func (s *sidx) StreamingParts(partIDsToSync
map[uint64]struct{}, group string, s
UncompressedSizeBytes:
part.partMetadata.UncompressedSizeBytes,
TotalCount:
part.partMetadata.TotalCount,
BlocksCount:
part.partMetadata.BlocksCount,
- MinTimestamp:
part.partMetadata.SegmentID,
MinKey:part.partMetadata.MinKey,
MaxKey:part.partMetadata.MaxKey,
PartType: name,
- })
+ }
+ if part.partMetadata.MinTimestamp != nil {
+ spd.MinTimestamp =
*part.partMetadata.MinTimestamp
+ }
+ if part.partMetadata.MaxTimestamp != nil {
+ spd.MaxTimestamp =
*part.partMetadata.MaxTimestamp
+ }
Review Comment:
When partMetadata.MinTimestamp/MaxTimestamp are nil, this leaves
StreamingPartData.MinTimestamp/MaxTimestamp at the zero value. The trace
chunked-sync receiver rejects ctx.MinTimestamp <= 0 (see
banyand/trace/write_data.go:248-250), so syncing older/back-compat SIDX parts
that omit timestamps will still fail. Consider falling back to
part.partMetadata.SegmentID (and maybe MaxTimestamp = MinTimestamp) when the
pointer fields are nil, or explicitly skipping/flagging such parts so sync
can’t produce invalid timestamps.
##
banyand/internal/sidx/sync_test.go:
##
@@ -0,0 +1,165 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package sidx
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "github.com/apache/skywalking-banyandb/api/data"
+ "github.com/apache/skywalking-banyandb/banyand/observability"
+ "github.com/apache/skywalking-banyandb/banyand/protector"
+ "github.com/apache/skywalking-banyandb/pkg/fs"
+)
+
+// TestStreamingParts_Timestamps verifies that StreamingParts propagates
+// MinTimestamp and MaxTimestamp from partMetadata (not SegmentID) to
+// StreamingPartData. This prevents zero-timestamp segments from being
+// created on the receiving node during distributed sync.
+func TestStreamingParts_Timestamps(t *testing.T) {
+ reqs := []WriteRequest{
+ createTestWriteRequest(1, 100, "data1"),
+ createTestWriteRequest(1, 200, "data2"),
+ }
+
+ t.Run("with_timestamps_set", func(t *testing.T) {
+ sidxIface := createTestSIDX(t)
+ raw := sidxIface.(*sidx)
+ defer func() {
+ assert.NoError(t, raw.Close())
+ }()
+
+ minTS := int64(17)
+ maxTS := int64(171000)
+ writeTestDataWithTimeRange(t, raw, reqs, 1, 1, &minTS, &maxTS)
+
+ flushIntro, err := raw.Flush(map[uint64]struct{}{1: {}})
+ require.NoError(t, err)
+ raw.IntroduceFlushed(flushIntro)
+ flushIntro.Release()
+
+ partIDs := map[uint64]struct{}{1: {}}
+ parts, releaseFuncs := raw.StreamingParts(partIDs,
"test-group", 0, "test-sidx")
+ defer func() {
+ for _, release := range releaseFuncs {
+ release()
+ }
+ }()
+
+ require.Len(t, parts, 1)
+ assert.Equal(t, uint64(1), parts[0].ID)
+ assert.Equal(t, int64(17), parts[0].MinTimestamp,
+ "MinTimestamp should come from
partMetadata.MinTimestamp, not SegmentID")
+ assert.Equal(t, int64(171000), parts[0].MaxTimestamp,
+ "MaxTimestamp should come from
partMetadata.MaxTimestamp")