Copilot commented on code in PR #874:
URL:
https://github.com/apache/skywalking-banyandb/pull/874#discussion_r2573654199
##########
banyand/stream/tag_filter.go:
##########
@@ -141,9 +141,11 @@ func (tff tagFamilyFilter)
unmarshal(tagFamilyMetadataBlock *dataBlock, metaRead
}
for _, tm := range tfm.tagMetadata {
tf := generateTagFilter()
+ hasMinMax := false
if tm.valueType == pbv1.ValueTypeInt64 {
Review Comment:
The `hasMinMax` flag is set to `true` based solely on the value type being
`Int64`, without checking if `tm.min` and `tm.max` are actually populated
(non-empty). This could cause tags with `Int64` type but empty min/max values
to be incorrectly retained in the map even though they have no useful filtering
data.
Consider checking if the min/max slices are non-empty:
```go
hasMinMax := false
if tm.valueType == pbv1.ValueTypeInt64 && len(tm.min) > 0 && len(tm.max) > 0
{
tf.min = tm.min
tf.max = tm.max
hasMinMax = true
}
```
```suggestion
if tm.valueType == pbv1.ValueTypeInt64 && len(tm.min) > 0 &&
len(tm.max) > 0 {
```
##########
banyand/stream/tag_filter_test.go:
##########
@@ -468,3 +470,255 @@ func BenchmarkTagFamilyFiltersUnmarshal(b *testing.B) {
})
}
}
+
+func generateMetaAndFilterWithScenarios(scenarios []struct {
+ name string
+ valueType pbv1.ValueType
+ hasFilter bool
+ hasMinMax bool
+ encodeType encoding.EncodeType
Review Comment:
[nitpick] The `hasFilter` field in the scenario might be clearer if renamed
to `hasBloomFilter` or `hasFilterBlock`. Currently, `hasFilter: false` means
"no bloom filter block," but a `DictionaryFilter` can still be created from the
tag value data when `encodeType == encoding.EncodeTypeDictionary`. This could
be confusing to readers who see `hasFilter: false` but expect a filter to be
created in the test expectations (line 640: `hasFilter: true`).
##########
banyand/stream/tag_filter_test.go:
##########
@@ -468,3 +470,255 @@ func BenchmarkTagFamilyFiltersUnmarshal(b *testing.B) {
})
}
}
+
+func generateMetaAndFilterWithScenarios(scenarios []struct {
+ name string
+ valueType pbv1.ValueType
+ hasFilter bool
+ hasMinMax bool
+ encodeType encoding.EncodeType
+},
+) ([]byte, []byte, []byte) {
+ tfm := generateTagFamilyMetadata()
+ defer releaseTagFamilyMetadata(tfm)
+ filterBuf := bytes.Buffer{}
+ tagValueBuf := bytes.Buffer{}
+
+ for _, scenario := range scenarios {
+ tm := &tagMetadata{
+ name: scenario.name,
+ valueType: scenario.valueType,
+ }
+
+ if scenario.hasMinMax && scenario.valueType ==
pbv1.ValueTypeInt64 {
+ tm.min = make([]byte, 8)
+ tm.max = make([]byte, 8)
+ binary.BigEndian.PutUint64(tm.min, 100)
+ binary.BigEndian.PutUint64(tm.max, 200)
+ }
+
+ if scenario.hasFilter {
+ // Create bloom filter
+ bf := filter.NewBloomFilter(10)
+ bf.Add([]byte("test-value"))
+ buf := make([]byte, 0)
+ buf = encodeBloomFilter(buf, bf)
+ tm.filterBlock.offset = uint64(filterBuf.Len())
+ tm.filterBlock.size = uint64(len(buf))
+ filterBuf.Write(buf)
+ } else {
+ // Set up tag value data with encode type
+ tm.offset = uint64(tagValueBuf.Len())
+ encodeTypeByte := byte(scenario.encodeType)
+ tagValueBuf.WriteByte(encodeTypeByte)
+ if scenario.encodeType == encoding.EncodeTypeDictionary
{
+ // Write dictionary data: encode type byte +
dictionary values
+ // Dictionary format: VarUint64(count) +
EncodeBytesBlock(values)
+ dictValues := [][]byte{
+ []byte("dict-value-1"),
+ []byte("dict-value-2"),
+ }
+ dictBuf := encoding.VarUint64ToBytes(nil,
uint64(len(dictValues)))
+ dictBuf = encoding.EncodeBytesBlock(dictBuf,
dictValues)
+ tagValueBuf.Write(dictBuf)
+ tm.size = uint64(1 + len(dictBuf))
+ } else {
+ // Non-dictionary encoding, just the encode
type byte
+ tm.size = 1
+ }
+ }
+
+ tfm.tagMetadata = append(tfm.tagMetadata, *tm)
+ }
+
+ metaBuf := make([]byte, 0)
+ metaBuf = tfm.marshal(metaBuf)
+ return metaBuf, filterBuf.Bytes(), tagValueBuf.Bytes()
+}
+
+func TestTagFamilyFiltersNPEAndUnmarshalBehavior(t *testing.T) {
+ // Test all scenarios together to verify NPE fix and unmarshal behavior
+ scenarios := []struct {
+ name string
+ valueType pbv1.ValueType
+ hasFilter bool
+ hasMinMax bool
+ encodeType encoding.EncodeType
+ }{
+ {
+ name: "bloom-tag",
+ valueType: pbv1.ValueTypeStr,
+ hasFilter: true,
+ hasMinMax: false,
+ encodeType: encoding.EncodeTypePlain,
+ },
+ {
+ name: "dict-tag",
+ valueType: pbv1.ValueTypeStr,
+ hasFilter: false,
+ hasMinMax: false,
+ encodeType: encoding.EncodeTypeDictionary,
+ },
+ {
+ name: "numeric-tag",
+ valueType: pbv1.ValueTypeInt64,
+ hasFilter: false,
+ hasMinMax: true,
+ encodeType: encoding.EncodeTypePlain,
+ },
+ {
+ name: "useless-tag",
+ valueType: pbv1.ValueTypeStr,
+ hasFilter: false,
+ hasMinMax: false,
+ encodeType: encoding.EncodeTypePlain,
+ },
+ }
Review Comment:
The test scenarios don't cover the edge case where a tag has `valueType ==
pbv1.ValueTypeInt64` but empty min/max values. This scenario could expose the
bug where `hasMinMax` is set to `true` based solely on the value type (line
148) without checking if min/max are actually populated.
Consider adding a test scenario:
```go
{
name: "int64-no-minmax",
valueType: pbv1.ValueTypeInt64,
hasFilter: false,
hasMinMax: false, // Important: no min/max despite Int64 type
encodeType: encoding.EncodeTypePlain,
}
```
This tag should NOT be kept in the map since it has no useful filtering data.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]