This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb.git
The following commit(s) were added to refs/heads/main by this push: new 9ead45d7 Fix boundary issue in findRange (#380) 9ead45d7 is described below commit 9ead45d7446a08f5a051ab5b92bb227fe0312fe8 Author: Huang Youliang <52878305+butterbri...@users.noreply.github.com> AuthorDate: Wed Jan 24 15:05:05 2024 +0800 Fix boundary issue in findRange (#380) * Fix boundary issue in findrange * Remove unnecessary query fields --------- Co-authored-by: 吴晟 Wu Sheng <wu.sh...@foxmail.com> --- banyand/measure/block.go | 46 ++--------------- banyand/measure/block_test.go | 85 ------------------------------- banyand/stream/block.go | 56 +++++--------------- banyand/stream/block_test.go | 93 --------------------------------- banyand/stream/query.go | 4 -- pkg/timestamp/range.go | 28 ++++++++++ pkg/timestamp/range_test.go | 116 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 161 insertions(+), 267 deletions(-) diff --git a/banyand/measure/block.go b/banyand/measure/block.go index 0bdcdea4..707a1ae4 100644 --- a/banyand/measure/block.go +++ b/banyand/measure/block.go @@ -27,6 +27,7 @@ import ( "github.com/apache/skywalking-banyandb/pkg/fs" "github.com/apache/skywalking-banyandb/pkg/logger" pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1" + "github.com/apache/skywalking-banyandb/pkg/timestamp" ) type block struct { @@ -542,11 +543,11 @@ func (bc *blockCursor) loadData(tmpBlock *block) bool { bc.bm.tagFamilies = tf tmpBlock.mustReadFrom(&bc.columnValuesDecoder, bc.p, bc.bm) - start, end, ok := findRange(tmpBlock.timestamps, bc.minTimestamp, bc.maxTimestamp) + start, end, ok := timestamp.FindRange(tmpBlock.timestamps, bc.minTimestamp, bc.maxTimestamp) if !ok { return false } - bc.timestamps = append(bc.timestamps, tmpBlock.timestamps[start:end]...) + bc.timestamps = append(bc.timestamps, tmpBlock.timestamps[start:end+1]...) for _, cf := range tmpBlock.tagFamilies { tf := columnFamily{ @@ -563,7 +564,7 @@ func (bc *blockCursor) loadData(tmpBlock *block) bool { if len(cf.columns[i].values) != len(tmpBlock.timestamps) { logger.Panicf("unexpected number of values for tags %q: got %d; want %d", cf.columns[i].name, len(cf.columns[i].values), len(tmpBlock.timestamps)) } - column.values = append(column.values, cf.columns[i].values[start:end]...) + column.values = append(column.values, cf.columns[i].values[start:end+1]...) tf.columns = append(tf.columns, column) } bc.tagFamilies = append(bc.tagFamilies, tf) @@ -582,49 +583,12 @@ func (bc *blockCursor) loadData(tmpBlock *block) bool { valueType: tmpBlock.field.columns[i].valueType, } - c.values = append(c.values, tmpBlock.field.columns[i].values[start:end]...) + c.values = append(c.values, tmpBlock.field.columns[i].values[start:end+1]...) bc.fields.columns = append(bc.fields.columns, c) } return true } -func findRange(timestamps []int64, min int64, max int64) (int, int, bool) { - l := len(timestamps) - start, end := -1, -1 - - for i := 0; i < l; i++ { - if timestamps[i] > max || timestamps[l-i-1] < min { - break - } - if timestamps[i] >= min && start == -1 { - start = i - } - if timestamps[l-i-1] <= max && end == -1 { - end = l - i - } - if start != -1 && end != -1 { - break - } - } - - if start == -1 && end == -1 { - return 0, 0, false - } - - if start == -1 { - start = 0 - } - - if end == -1 { - end = l - } - - if start >= end { - return 0, 0, false - } - return start, end, true -} - var blockCursorPool sync.Pool func generateBlockCursor() *blockCursor { diff --git a/banyand/measure/block_test.go b/banyand/measure/block_test.go index 2431b735..655960dc 100644 --- a/banyand/measure/block_test.go +++ b/banyand/measure/block_test.go @@ -417,91 +417,6 @@ func Test_marshalAndUnmarshalBlock(t *testing.T) { } } -func Test_findRange(t *testing.T) { - type args struct { - timestamps []int64 - min int64 - max int64 - } - tests := []struct { - name string - args args - wantStart int - wantEnd int - wantExist bool - }{ - { - name: "Test with empty timestamps", - args: args{ - timestamps: []int64{}, - min: 1, - max: 10, - }, - wantStart: 0, - wantEnd: 0, - wantExist: false, - }, - { - name: "Test with single timestamp", - args: args{ - timestamps: []int64{1}, - min: 1, - max: 1, - }, - wantStart: 0, - wantEnd: 1, - wantExist: true, - }, - { - name: "Test with range not in timestamps", - args: args{ - timestamps: []int64{1, 2, 3, 4, 5}, - min: 6, - max: 10, - }, - wantStart: 0, - wantEnd: 0, - wantExist: false, - }, - { - name: "Test with range in timestamps", - args: args{ - timestamps: []int64{1, 2, 3, 4, 5}, - min: 2, - max: 4, - }, - wantStart: 1, - wantEnd: 4, - wantExist: true, - }, - { - name: "Test with range as timestamps", - args: args{ - timestamps: []int64{1, 2, 3, 4, 5}, - min: 1, - max: 5, - }, - wantStart: 0, - wantEnd: 5, - wantExist: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotStart, gotEnd, gotExist := findRange(tt.args.timestamps, tt.args.min, tt.args.max) - if gotStart != tt.wantStart { - t.Errorf("findRange() gotStart = %v, want %v", gotStart, tt.wantStart) - } - if gotEnd != tt.wantEnd { - t.Errorf("findRange() gotEnd = %v, want %v", gotEnd, tt.wantEnd) - } - if gotExist != tt.wantExist { - t.Errorf("findRange() gotExist = %v, want %v", gotExist, tt.wantExist) - } - }) - } -} - func Test_blockPointer_append(t *testing.T) { type fields struct { timestamps []int64 diff --git a/banyand/stream/block.go b/banyand/stream/block.go index a3002b70..1ef9d05f 100644 --- a/banyand/stream/block.go +++ b/banyand/stream/block.go @@ -27,6 +27,7 @@ import ( "github.com/apache/skywalking-banyandb/pkg/fs" "github.com/apache/skywalking-banyandb/pkg/logger" pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1" + "github.com/apache/skywalking-banyandb/pkg/timestamp" ) type block struct { @@ -410,18 +411,16 @@ func releaseBlock(b *block) { var blockPool sync.Pool type blockCursor struct { - p *part - timestamps []int64 - elementIDs []string - tagFamilies []tagFamily - tagValuesDecoder encoding.BytesBlockDecoder - tagProjection []pbv1.TagProjection - bm blockMetadata - idx int - minTimestamp int64 - maxTimestamp int64 - includeMinTimestamp bool - includeMaxTimestamp bool + p *part + timestamps []int64 + elementIDs []string + tagFamilies []tagFamily + tagValuesDecoder encoding.BytesBlockDecoder + tagProjection []pbv1.TagProjection + bm blockMetadata + idx int + minTimestamp int64 + maxTimestamp int64 } func (bc *blockCursor) reset() { @@ -430,8 +429,6 @@ func (bc *blockCursor) reset() { bc.bm = blockMetadata{} bc.minTimestamp = 0 bc.maxTimestamp = 0 - bc.includeMinTimestamp = false - bc.includeMaxTimestamp = false bc.tagProjection = bc.tagProjection[:0] bc.timestamps = bc.timestamps[:0] @@ -450,8 +447,6 @@ func (bc *blockCursor) init(p *part, bm blockMetadata, queryOpts queryOptions) { bc.bm = bm bc.minTimestamp = queryOpts.minTimestamp bc.maxTimestamp = queryOpts.maxTimestamp - bc.includeMinTimestamp = queryOpts.includeMin - bc.includeMaxTimestamp = queryOpts.includeMax bc.tagProjection = queryOpts.TagProjection } @@ -536,7 +531,7 @@ func (bc *blockCursor) loadData(tmpBlock *block) bool { bc.bm.tagFamilies = tf tmpBlock.mustReadFrom(&bc.tagValuesDecoder, bc.p, bc.bm) - start, end, ok := findRange(tmpBlock.timestamps, bc.minTimestamp, bc.maxTimestamp, bc.includeMinTimestamp, bc.includeMaxTimestamp) + start, end, ok := timestamp.FindRange(tmpBlock.timestamps, bc.minTimestamp, bc.maxTimestamp) if !ok { return false } @@ -568,33 +563,6 @@ func (bc *blockCursor) loadData(tmpBlock *block) bool { return true } -func findRange(timestamps []int64, min, max int64, includeMin, includeMax bool) (int, int, bool) { - if len(timestamps) == 0 { - return -1, -1, false - } - if timestamps[0] > max || !includeMin && timestamps[0] == max { - return -1, -1, false - } - if timestamps[len(timestamps)-1] < min || !includeMax && timestamps[len(timestamps)-1] == min { - return -1, -1, false - } - - start, end := -1, len(timestamps) - for start < len(timestamps)-1 { - start++ - if timestamps[start] > min || (includeMin && timestamps[start] == min) { - break - } - } - for end > 0 { - end-- - if timestamps[end] < max || (includeMax && timestamps[end] == max) { - break - } - } - return start, end, start <= end -} - var blockCursorPool sync.Pool func generateBlockCursor() *blockCursor { diff --git a/banyand/stream/block_test.go b/banyand/stream/block_test.go index 0b30d706..88f3d90e 100644 --- a/banyand/stream/block_test.go +++ b/banyand/stream/block_test.go @@ -427,99 +427,6 @@ func Test_marshalAndUnmarshalBlock(t *testing.T) { } } -func Test_findRange(t *testing.T) { - type args struct { - timestamps []int64 - min int64 - max int64 - includeMin bool - includeMax bool - } - tests := []struct { - name string - args args - wantStart int - wantEnd int - wantExist bool - }{ - { - name: "Test with empty timestamps", - args: args{ - timestamps: []int64{}, - min: 1, - max: 10, - }, - wantStart: -1, - wantEnd: -1, - wantExist: false, - }, - { - name: "Test with single timestamp", - args: args{ - timestamps: []int64{1}, - min: 1, - max: 1, - includeMin: true, - includeMax: true, - }, - wantStart: 0, - wantEnd: 0, - wantExist: true, - }, - { - name: "Test with range not in timestamps", - args: args{ - timestamps: []int64{1, 2, 3, 4, 5}, - min: 6, - max: 10, - }, - wantStart: -1, - wantEnd: -1, - wantExist: false, - }, - { - name: "Test with range in timestamps", - args: args{ - timestamps: []int64{1, 2, 3, 4, 5}, - min: 2, - max: 4, - includeMin: true, - includeMax: true, - }, - wantStart: 1, - wantEnd: 3, - wantExist: true, - }, - { - name: "Test with range as timestamps", - args: args{ - timestamps: []int64{1, 2, 3, 4, 5}, - min: 1, - max: 5, - includeMin: true, - includeMax: true, - }, - wantStart: 0, - wantEnd: 4, - wantExist: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotStart, gotEnd, gotExist := findRange(tt.args.timestamps, tt.args.min, tt.args.max, tt.args.includeMin, tt.args.includeMax) - if gotStart != tt.wantStart { - t.Errorf("findRange() gotStart = %v, want %v", gotStart, tt.wantStart) - } - if gotEnd != tt.wantEnd { - t.Errorf("findRange() gotEnd = %v, want %v", gotEnd, tt.wantEnd) - } - if gotExist != tt.wantExist { - t.Errorf("findRange() gotExist = %v, want %v", gotExist, tt.wantExist) - } - }) - } -} - func Test_blockPointer_append(t *testing.T) { type fields struct { timestamps []int64 diff --git a/banyand/stream/query.go b/banyand/stream/query.go index d4580633..7ac8e65d 100644 --- a/banyand/stream/query.go +++ b/banyand/stream/query.go @@ -39,8 +39,6 @@ type queryOptions struct { pbv1.StreamQueryOptions minTimestamp int64 maxTimestamp int64 - includeMin bool - includeMax bool } func mustDecodeTagValue(valueType pbv1.ValueType, value []byte) *modelv1.TagValue { @@ -407,8 +405,6 @@ func (s *stream) Query(ctx context.Context, sqo pbv1.StreamQueryOptions) (pbv1.S StreamQueryOptions: sqo, minTimestamp: sqo.TimeRange.Start.UnixNano(), maxTimestamp: sqo.TimeRange.End.UnixNano(), - includeMin: sqo.TimeRange.IncludeStart, - includeMax: sqo.TimeRange.IncludeEnd, } var n int for i := range tabWrappers { diff --git a/pkg/timestamp/range.go b/pkg/timestamp/range.go index 0376187f..f6d3e2e5 100644 --- a/pkg/timestamp/range.go +++ b/pkg/timestamp/range.go @@ -112,3 +112,31 @@ func NewTimeRange(start, end time.Time, includeStart, includeEnd bool) TimeRange func NewTimeRangeDuration(start time.Time, duration time.Duration, includeStart, includeEnd bool) TimeRange { return NewTimeRange(start, start.Add(duration), includeStart, includeEnd) } + +// FindRange returns the indices of the first and last elements in a sorted 'timestamps' slice that are within the min and max range. +func FindRange(timestamps []int64, min, max int64) (int, int, bool) { + if len(timestamps) == 0 { + return -1, -1, false + } + if timestamps[0] > max { + return -1, -1, false + } + if timestamps[len(timestamps)-1] < min { + return -1, -1, false + } + + start, end := -1, len(timestamps) + for start < len(timestamps)-1 { + start++ + if timestamps[start] >= min { + break + } + } + for end > 0 { + end-- + if timestamps[end] <= max { + break + } + } + return start, end, start <= end +} diff --git a/pkg/timestamp/range_test.go b/pkg/timestamp/range_test.go new file mode 100644 index 00000000..c5746dd3 --- /dev/null +++ b/pkg/timestamp/range_test.go @@ -0,0 +1,116 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package timestamp + +import "testing" + +func Test_findRange(t *testing.T) { + type args struct { + timestamps []int64 + min int64 + max int64 + } + tests := []struct { + name string + args args + wantStart int + wantEnd int + wantExist bool + }{ + { + name: "Test with empty timestamps", + args: args{ + timestamps: []int64{}, + min: 1, + max: 10, + }, + wantStart: -1, + wantEnd: -1, + wantExist: false, + }, + { + name: "Test with single timestamp", + args: args{ + timestamps: []int64{1}, + min: 1, + max: 1, + }, + wantStart: 0, + wantEnd: 0, + wantExist: true, + }, + { + name: "Test with range not in timestamps", + args: args{ + timestamps: []int64{1, 2, 3, 4, 5}, + min: 6, + max: 10, + }, + wantStart: -1, + wantEnd: -1, + wantExist: false, + }, + { + name: "Test with range in timestamps", + args: args{ + timestamps: []int64{1, 2, 3, 4, 5}, + min: 2, + max: 4, + }, + wantStart: 1, + wantEnd: 3, + wantExist: true, + }, + { + name: "Test with range as timestamps", + args: args{ + timestamps: []int64{1, 2, 3, 4, 5}, + min: 1, + max: 5, + }, + wantStart: 0, + wantEnd: 4, + wantExist: true, + }, + { + name: "Test with max equals to the first timestamp", + args: args{ + timestamps: []int64{1, 2, 3, 4, 5}, + min: 1, + max: 1, + }, + wantStart: 0, + wantEnd: 0, + wantExist: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotStart, gotEnd, gotExist := FindRange(tt.args.timestamps, tt.args.min, tt.args.max) + if gotStart != tt.wantStart { + t.Errorf("findRange() gotStart = %v, want %v", gotStart, tt.wantStart) + } + if gotEnd != tt.wantEnd { + t.Errorf("findRange() gotEnd = %v, want %v", gotEnd, tt.wantEnd) + } + if gotExist != tt.wantExist { + t.Errorf("findRange() gotExist = %v, want %v", gotExist, tt.wantExist) + } + }) + } +}