Copilot commented on code in PR #1060:
URL:
https://github.com/apache/skywalking-banyandb/pull/1060#discussion_r3064214202
##########
banyand/internal/sidx/sync.go:
##########
@@ -84,11 +84,17 @@ func (s *sidx) StreamingParts(partIDsToSync
map[uint64]struct{}, group string, s
UncompressedSizeBytes:
part.partMetadata.UncompressedSizeBytes,
TotalCount:
part.partMetadata.TotalCount,
BlocksCount:
part.partMetadata.BlocksCount,
- MinTimestamp:
part.partMetadata.SegmentID,
MinKey: part.partMetadata.MinKey,
MaxKey: part.partMetadata.MaxKey,
PartType: name,
- })
+ }
+ if part.partMetadata.MinTimestamp != nil {
+ spd.MinTimestamp =
*part.partMetadata.MinTimestamp
+ }
+ if part.partMetadata.MaxTimestamp != nil {
+ spd.MaxTimestamp =
*part.partMetadata.MaxTimestamp
+ }
Review Comment:
When partMetadata.MinTimestamp/MaxTimestamp are nil, this leaves
StreamingPartData.MinTimestamp/MaxTimestamp at the zero value. The trace
chunked-sync receiver rejects ctx.MinTimestamp <= 0 (see
banyand/trace/write_data.go:248-250), so syncing older/back-compat SIDX parts
that omit timestamps will still fail. Consider falling back to
part.partMetadata.SegmentID (and maybe MaxTimestamp = MinTimestamp) when the
pointer fields are nil, or explicitly skipping/flagging such parts so sync
can’t produce invalid timestamps.
##########
banyand/internal/sidx/sync_test.go:
##########
@@ -0,0 +1,165 @@
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package sidx
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "github.com/apache/skywalking-banyandb/api/data"
+ "github.com/apache/skywalking-banyandb/banyand/observability"
+ "github.com/apache/skywalking-banyandb/banyand/protector"
+ "github.com/apache/skywalking-banyandb/pkg/fs"
+)
+
+// TestStreamingParts_Timestamps verifies that StreamingParts propagates
+// MinTimestamp and MaxTimestamp from partMetadata (not SegmentID) to
+// StreamingPartData. This prevents zero-timestamp segments from being
+// created on the receiving node during distributed sync.
+func TestStreamingParts_Timestamps(t *testing.T) {
+ reqs := []WriteRequest{
+ createTestWriteRequest(1, 100, "data1"),
+ createTestWriteRequest(1, 200, "data2"),
+ }
+
+ t.Run("with_timestamps_set", func(t *testing.T) {
+ sidxIface := createTestSIDX(t)
+ raw := sidxIface.(*sidx)
+ defer func() {
+ assert.NoError(t, raw.Close())
+ }()
+
+ minTS := int64(1700000000)
+ maxTS := int64(1700001000)
+ writeTestDataWithTimeRange(t, raw, reqs, 1, 1, &minTS, &maxTS)
+
+ flushIntro, err := raw.Flush(map[uint64]struct{}{1: {}})
+ require.NoError(t, err)
+ raw.IntroduceFlushed(flushIntro)
+ flushIntro.Release()
+
+ partIDs := map[uint64]struct{}{1: {}}
+ parts, releaseFuncs := raw.StreamingParts(partIDs,
"test-group", 0, "test-sidx")
+ defer func() {
+ for _, release := range releaseFuncs {
+ release()
+ }
+ }()
+
+ require.Len(t, parts, 1)
+ assert.Equal(t, uint64(1), parts[0].ID)
+ assert.Equal(t, int64(1700000000), parts[0].MinTimestamp,
+ "MinTimestamp should come from
partMetadata.MinTimestamp, not SegmentID")
+ assert.Equal(t, int64(1700001000), parts[0].MaxTimestamp,
+ "MaxTimestamp should come from
partMetadata.MaxTimestamp")
+ assert.Equal(t, "test-group", parts[0].Group)
+ assert.Equal(t, uint32(0), parts[0].ShardID)
+ assert.Equal(t, data.TopicTracePartSync.String(),
parts[0].Topic)
+ assert.Equal(t, "test-sidx", parts[0].PartType)
+ })
+
+ t.Run("nil_timestamps_default_to_zero", func(t *testing.T) {
+ sidxIface := createTestSIDX(t)
+ raw := sidxIface.(*sidx)
+ defer func() {
+ assert.NoError(t, raw.Close())
+ }()
+
+ writeTestDataWithTimeRange(t, raw, reqs, 1, 1, nil, nil)
+
+ flushIntro, err := raw.Flush(map[uint64]struct{}{1: {}})
+ require.NoError(t, err)
+ raw.IntroduceFlushed(flushIntro)
+ flushIntro.Release()
+
+ partIDs := map[uint64]struct{}{1: {}}
+ parts, releaseFuncs := raw.StreamingParts(partIDs,
"test-group", 0, "test-sidx")
+ defer func() {
+ for _, release := range releaseFuncs {
+ release()
+ }
+ }()
+
+ require.Len(t, parts, 1)
+ assert.Equal(t, int64(0), parts[0].MinTimestamp,
+ "MinTimestamp should default to 0 when
partMetadata.MinTimestamp is nil")
+ assert.Equal(t, int64(0), parts[0].MaxTimestamp,
+ "MaxTimestamp should default to 0 when
partMetadata.MaxTimestamp is nil")
+ })
Review Comment:
This subtest asserts that nil partMetadata timestamps should default to 0,
but the receiving chunk-sync handlers validate MinTimestamp > 0 and will reject
parts with 0 (e.g., banyand/trace/write_data.go:248-250). If nil timestamps are
expected for back-compat parts, the test should reflect the intended non-zero
fallback (e.g., SegmentID or MinTimestamp=MaxTimestamp), otherwise it’s
encoding behavior that still breaks sync.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]