hanahmily commented on code in PR #688:
URL: 
https://github.com/apache/skywalking-banyandb/pull/688#discussion_r2160534930


##########
banyand/measure/column.go:
##########
@@ -18,13 +18,33 @@
 package measure
 
 import (
+       stdbytes "bytes"
+       "math"
+       "sync"
+
        "github.com/apache/skywalking-banyandb/pkg/bytes"
+       "github.com/apache/skywalking-banyandb/pkg/convert"
        "github.com/apache/skywalking-banyandb/pkg/encoding"
        "github.com/apache/skywalking-banyandb/pkg/fs"
        "github.com/apache/skywalking-banyandb/pkg/logger"
        pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1"
 )
 
+var (
+       int64SlicePool = sync.Pool{
+               New: func() interface{} {
+                       s := make([]int64, 0, 1024)

Review Comment:
   You should avoid pre-allocating the size of the slice. Instead, resize the 
slice as needed.



##########
banyand/measure/column.go:
##########
@@ -18,13 +18,33 @@
 package measure
 
 import (
+       stdbytes "bytes"
+       "math"
+       "sync"
+
        "github.com/apache/skywalking-banyandb/pkg/bytes"
+       "github.com/apache/skywalking-banyandb/pkg/convert"
        "github.com/apache/skywalking-banyandb/pkg/encoding"
        "github.com/apache/skywalking-banyandb/pkg/fs"
        "github.com/apache/skywalking-banyandb/pkg/logger"
        pbv1 "github.com/apache/skywalking-banyandb/pkg/pb/v1"
 )
 
+var (
+       int64SlicePool = sync.Pool{

Review Comment:
   You should utilize the pool in our library to monitor the pool leak in the 
test case.



##########
banyand/measure/column_metadata.go:
##########
@@ -28,24 +28,63 @@ import (
 
 type columnMetadata struct {
        name string
+       encodeBlock
        dataBlock
        valueType pbv1.ValueType
 }
 
+type encodeBlock struct {
+       encodeType encoding.EncodeType
+       firstValue int64

Review Comment:
   The first value should be at the column instead of the meta. It can not help 
us search the block more than a overhead.



##########
banyand/measure/column.go:
##########
@@ -60,11 +80,88 @@ func (c *column) mustWriteTo(cm *columnMetadata, 
columnWriter *writer) {
        bb := bigValuePool.Generate()
        defer bigValuePool.Release(bb)
 
-       // marshal values
-       bb.Buf = encoding.EncodeBytesBlock(bb.Buf[:0], c.values)
+       encodeDefault := func() {
+               bb.Buf = encoding.EncodeBytesBlock(bb.Buf[:0], c.values)
+       }
+
+       // select encoding based on data type
+encodeSwitch:
+       switch c.valueType {
+       case pbv1.ValueTypeInt64:

Review Comment:
   Encapsulate the encoding logic in a function. We will have several encoders 
later on.



##########
banyand/measure/column.go:
##########
@@ -60,11 +80,88 @@ func (c *column) mustWriteTo(cm *columnMetadata, 
columnWriter *writer) {
        bb := bigValuePool.Generate()
        defer bigValuePool.Release(bb)
 
-       // marshal values
-       bb.Buf = encoding.EncodeBytesBlock(bb.Buf[:0], c.values)
+       encodeDefault := func() {
+               bb.Buf = encoding.EncodeBytesBlock(bb.Buf[:0], c.values)
+       }
+
+       // select encoding based on data type
+encodeSwitch:
+       switch c.valueType {
+       case pbv1.ValueTypeInt64:
+               // convert byte array to int64 array
+               intValuesPtr := int64SlicePool.Get().(*[]int64)
+               intValues := *intValuesPtr
+               intValues = intValues[:0]
+               if cap(intValues) < len(c.values) {
+                       intValues = make([]int64, len(c.values))
+               } else {
+                       intValues = intValues[:len(c.values)]
+               }
+               defer func() {
+                       *intValuesPtr = intValues[:0]
+                       int64SlicePool.Put(intValuesPtr)
+               }()
+
+               for i, v := range c.values {
+                       if len(v) != 8 {
+                               if v == nil || string(v) == "null" {
+                                       // TODO c.valueType = pbv1.ValueTypeStr
+                                       cm.valueType = pbv1.ValueTypeStr
+                                       encodeDefault()
+                                       break encodeSwitch // skip to final part
+                               }
+                               var val int64
+                               for j := 0; j < len(v); j++ {
+                                       val = (val << 8) | int64(v[j])
+                               }
+                               intValues[i] = val
+                       } else {
+                               intValues[i] = convert.BytesToInt64(v)
+                       }
+               }
+               // use delta encoding for integer column
+               var encodeType encoding.EncodeType
+               var firstValue int64
+               bb.Buf, encodeType, firstValue = 
encoding.Int64ListToBytes(bb.Buf[:0], intValues)
+               cm.encodeType = encodeType
+               cm.firstValue = firstValue
+               if cm.encodeType == encoding.EncodeTypeUnknown {
+                       logger.Panicf("invalid encode type for int64 values")
+               }
+       case pbv1.ValueTypeFloat64:
+               // convert byte array to float64 array
+               floatValuesPtr := float64SlicePool.Get().(*[]float64)
+               floatValues := *floatValuesPtr
+               floatValues = floatValues[:0]
+               if cap(floatValues) < len(c.values) {
+                       floatValues = make([]float64, len(c.values))
+               } else {
+                       floatValues = floatValues[:len(c.values)]
+               }
+               defer func() {
+                       *floatValuesPtr = floatValues[:0]
+                       float64SlicePool.Put(floatValuesPtr)
+               }()
+
+               for i, v := range c.values {
+                       floatValues[i] = convert.BytesToFloat64(v)
+               }
+               // use XOR encoding for float column
+               bb.Buf = bb.Buf[:0]
+               writer := encoding.NewWriter()

Review Comment:
   To prevent unnecessary allocation, please pool this writer.



##########
banyand/measure/column.go:
##########
@@ -60,11 +80,88 @@ func (c *column) mustWriteTo(cm *columnMetadata, 
columnWriter *writer) {
        bb := bigValuePool.Generate()
        defer bigValuePool.Release(bb)
 
-       // marshal values
-       bb.Buf = encoding.EncodeBytesBlock(bb.Buf[:0], c.values)
+       encodeDefault := func() {
+               bb.Buf = encoding.EncodeBytesBlock(bb.Buf[:0], c.values)
+       }
+
+       // select encoding based on data type
+encodeSwitch:
+       switch c.valueType {
+       case pbv1.ValueTypeInt64:
+               // convert byte array to int64 array
+               intValuesPtr := int64SlicePool.Get().(*[]int64)
+               intValues := *intValuesPtr
+               intValues = intValues[:0]
+               if cap(intValues) < len(c.values) {
+                       intValues = make([]int64, len(c.values))
+               } else {
+                       intValues = intValues[:len(c.values)]
+               }
+               defer func() {
+                       *intValuesPtr = intValues[:0]
+                       int64SlicePool.Put(intValuesPtr)
+               }()
+
+               for i, v := range c.values {
+                       if len(v) != 8 {

Review Comment:
   If the value is malformed, use logger.Panic to notify this bug.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@skywalking.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to