OleksiienkoMykyta commented on code in PR #1828: URL: https://github.com/apache/cassandra-gocql-driver/pull/1828#discussion_r2034742664
########## marshal.go: ########## @@ -1716,6 +1726,169 @@ func unmarshalList(info TypeInfo, data []byte, value interface{}) error { return unmarshalErrorf("can not unmarshal %s into %T. Accepted types: *slice, *array.", info, value) } +func marshalVector(info VectorType, value interface{}) ([]byte, error) { + if value == nil { + return nil, nil + } else if _, ok := value.(unsetColumn); ok { + return nil, nil + } + + rv := reflect.ValueOf(value) + t := rv.Type() + k := t.Kind() + if k == reflect.Slice && rv.IsNil() { + return nil, nil + } + + switch k { + case reflect.Slice, reflect.Array: + buf := &bytes.Buffer{} + n := rv.Len() + if n != info.Dimensions { + return nil, marshalErrorf("expected vector with %d dimensions, received %d", info.Dimensions, n) + } + + for i := 0; i < n; i++ { + item, err := Marshal(info.SubType, rv.Index(i).Interface()) + if err != nil { + return nil, err + } + if isVectorVariableLengthType(info.SubType) { + writeUnsignedVInt(buf, uint64(len(item))) + } + buf.Write(item) + } + return buf.Bytes(), nil + } + return nil, marshalErrorf("can not marshal %T into %s", value, info) +} + +func unmarshalVector(info VectorType, data []byte, value interface{}) error { + rv := reflect.ValueOf(value) + if rv.Kind() != reflect.Ptr { + return unmarshalErrorf("can not unmarshal into non-pointer %T", value) + } + rv = rv.Elem() + t := rv.Type() + k := t.Kind() + switch k { + case reflect.Slice, reflect.Array: + if data == nil { + if k == reflect.Array { + return unmarshalErrorf("unmarshal vector: can not store nil in array value") + } + if rv.IsNil() { + return nil + } + rv.Set(reflect.Zero(t)) + return nil + } + if k == reflect.Array { + if rv.Len() != info.Dimensions { + return unmarshalErrorf("unmarshal vector: array of size %d cannot store vector of %d dimensions", rv.Len(), info.Dimensions) + } + } else { + rv.Set(reflect.MakeSlice(t, info.Dimensions, info.Dimensions)) + } + elemSize := len(data) / info.Dimensions + for i := 0; i < info.Dimensions; i++ { + offset := 0 + if isVectorVariableLengthType(info.SubType) { + m, p, err := readUnsignedVint(data, 0) + if err != nil { + return err + } + elemSize = int(m) + offset = p + } + if offset > 0 { + data = data[offset:] + } + var unmarshalData []byte + if elemSize >= 0 { + if len(data) < elemSize { + return unmarshalErrorf("unmarshal vector: unexpected eof") + } + unmarshalData = data[:elemSize] + data = data[elemSize:] + } + err := Unmarshal(info.SubType, unmarshalData, rv.Index(i).Addr().Interface()) + if err != nil { + return unmarshalErrorf("failed to unmarshal %s into %T: %s", info.SubType, unmarshalData, err.Error()) + } + } + return nil + } + return unmarshalErrorf("can not unmarshal %s into %T", info, value) +} + +func isVectorVariableLengthType(elemType TypeInfo) bool { + switch elemType.Type() { + case TypeVarchar, TypeAscii, TypeBlob, TypeText: + return true + case TypeCounter: + return true + case TypeDuration, TypeDate, TypeTime: + return true + case TypeDecimal, TypeSmallInt, TypeTinyInt, TypeVarint: + return true + case TypeInet: + return true + case TypeList, TypeSet, TypeMap, TypeUDT, TypeTuple: + return true + case TypeCustom: + switch elemType.(type) { + case VectorType: + vecType := elemType.(VectorType) + return isVectorVariableLengthType(vecType.SubType) + } + return true + } + return false +} + +func writeUnsignedVInt(buf *bytes.Buffer, v uint64) { + numBytes := computeUnsignedVIntSize(v) + if numBytes <= 1 { + buf.WriteByte(byte(v)) + return + } + + extraBytes := numBytes - 1 + var tmp = make([]byte, numBytes) + for i := extraBytes; i >= 0; i-- { + tmp[i] = byte(v) + v >>= 8 + } + tmp[0] |= byte(^(0xff >> uint(extraBytes))) + buf.Write(tmp) +} + +func readUnsignedVint(data []byte, start int) (uint64, int, error) { Review Comment: `readUnsignedVint` -> `readUnsignedVInt`, it would be a bit more consistent. ########## marshal.go: ########## @@ -172,6 +172,11 @@ func Marshal(info TypeInfo, value interface{}) ([]byte, error) { return marshalDate(info, value) case TypeDuration: return marshalDuration(info, value) + case TypeCustom: + switch info.(type) { + case VectorType: + return marshalVector(info.(VectorType), value) + } Review Comment: The nested switch looks redundant. Maybe something like this would be more suitable? ``` case TypeCustom: if vector, ok := info.(VectorType); ok { return marshalVector(vector, value) } ``` ########## marshal.go: ########## @@ -1716,6 +1726,169 @@ func unmarshalList(info TypeInfo, data []byte, value interface{}) error { return unmarshalErrorf("can not unmarshal %s into %T. Accepted types: *slice, *array.", info, value) } +func marshalVector(info VectorType, value interface{}) ([]byte, error) { + if value == nil { + return nil, nil + } else if _, ok := value.(unsetColumn); ok { + return nil, nil + } + + rv := reflect.ValueOf(value) + t := rv.Type() + k := t.Kind() + if k == reflect.Slice && rv.IsNil() { + return nil, nil + } + + switch k { + case reflect.Slice, reflect.Array: + buf := &bytes.Buffer{} + n := rv.Len() + if n != info.Dimensions { + return nil, marshalErrorf("expected vector with %d dimensions, received %d", info.Dimensions, n) + } + + for i := 0; i < n; i++ { + item, err := Marshal(info.SubType, rv.Index(i).Interface()) + if err != nil { + return nil, err + } + if isVectorVariableLengthType(info.SubType) { + writeUnsignedVInt(buf, uint64(len(item))) + } + buf.Write(item) + } + return buf.Bytes(), nil + } + return nil, marshalErrorf("can not marshal %T into %s", value, info) +} + +func unmarshalVector(info VectorType, data []byte, value interface{}) error { + rv := reflect.ValueOf(value) + if rv.Kind() != reflect.Ptr { + return unmarshalErrorf("can not unmarshal into non-pointer %T", value) + } + rv = rv.Elem() + t := rv.Type() + k := t.Kind() + switch k { + case reflect.Slice, reflect.Array: + if data == nil { + if k == reflect.Array { + return unmarshalErrorf("unmarshal vector: can not store nil in array value") + } + if rv.IsNil() { + return nil + } + rv.Set(reflect.Zero(t)) + return nil + } + if k == reflect.Array { + if rv.Len() != info.Dimensions { + return unmarshalErrorf("unmarshal vector: array of size %d cannot store vector of %d dimensions", rv.Len(), info.Dimensions) + } + } else { + rv.Set(reflect.MakeSlice(t, info.Dimensions, info.Dimensions)) + } + elemSize := len(data) / info.Dimensions + for i := 0; i < info.Dimensions; i++ { + offset := 0 + if isVectorVariableLengthType(info.SubType) { + m, p, err := readUnsignedVint(data, 0) + if err != nil { + return err + } + elemSize = int(m) + offset = p + } + if offset > 0 { + data = data[offset:] + } + var unmarshalData []byte + if elemSize >= 0 { + if len(data) < elemSize { + return unmarshalErrorf("unmarshal vector: unexpected eof") + } + unmarshalData = data[:elemSize] + data = data[elemSize:] + } + err := Unmarshal(info.SubType, unmarshalData, rv.Index(i).Addr().Interface()) + if err != nil { + return unmarshalErrorf("failed to unmarshal %s into %T: %s", info.SubType, unmarshalData, err.Error()) + } + } + return nil + } + return unmarshalErrorf("can not unmarshal %s into %T", info, value) +} + +func isVectorVariableLengthType(elemType TypeInfo) bool { + switch elemType.Type() { + case TypeVarchar, TypeAscii, TypeBlob, TypeText: + return true + case TypeCounter: + return true + case TypeDuration, TypeDate, TypeTime: + return true + case TypeDecimal, TypeSmallInt, TypeTinyInt, TypeVarint: + return true + case TypeInet: + return true + case TypeList, TypeSet, TypeMap, TypeUDT, TypeTuple: + return true + case TypeCustom: + switch elemType.(type) { + case VectorType: + vecType := elemType.(VectorType) + return isVectorVariableLengthType(vecType.SubType) + } + return true + } + return false +} + +func writeUnsignedVInt(buf *bytes.Buffer, v uint64) { + numBytes := computeUnsignedVIntSize(v) + if numBytes <= 1 { + buf.WriteByte(byte(v)) + return + } + + extraBytes := numBytes - 1 + var tmp = make([]byte, numBytes) + for i := extraBytes; i >= 0; i-- { + tmp[i] = byte(v) + v >>= 8 + } + tmp[0] |= byte(^(0xff >> uint(extraBytes))) + buf.Write(tmp) +} + +func readUnsignedVint(data []byte, start int) (uint64, int, error) { Review Comment: Also, maybe it should be provided with unit tests, maybe even with fuzzing, to make sure that everything works properly. Also, I think that it should be along with some documentation, to help developers dive into it faster. Wdyt? ########## marshal.go: ########## @@ -1716,6 +1726,169 @@ func unmarshalList(info TypeInfo, data []byte, value interface{}) error { return unmarshalErrorf("can not unmarshal %s into %T. Accepted types: *slice, *array.", info, value) } +func marshalVector(info VectorType, value interface{}) ([]byte, error) { + if value == nil { + return nil, nil + } else if _, ok := value.(unsetColumn); ok { + return nil, nil + } + + rv := reflect.ValueOf(value) + t := rv.Type() + k := t.Kind() + if k == reflect.Slice && rv.IsNil() { + return nil, nil + } + + switch k { + case reflect.Slice, reflect.Array: + buf := &bytes.Buffer{} + n := rv.Len() + if n != info.Dimensions { + return nil, marshalErrorf("expected vector with %d dimensions, received %d", info.Dimensions, n) + } + + for i := 0; i < n; i++ { + item, err := Marshal(info.SubType, rv.Index(i).Interface()) + if err != nil { + return nil, err + } + if isVectorVariableLengthType(info.SubType) { + writeUnsignedVInt(buf, uint64(len(item))) + } + buf.Write(item) + } + return buf.Bytes(), nil + } + return nil, marshalErrorf("can not marshal %T into %s", value, info) +} + +func unmarshalVector(info VectorType, data []byte, value interface{}) error { + rv := reflect.ValueOf(value) + if rv.Kind() != reflect.Ptr { + return unmarshalErrorf("can not unmarshal into non-pointer %T", value) + } + rv = rv.Elem() + t := rv.Type() + k := t.Kind() + switch k { + case reflect.Slice, reflect.Array: + if data == nil { + if k == reflect.Array { + return unmarshalErrorf("unmarshal vector: can not store nil in array value") + } + if rv.IsNil() { + return nil + } + rv.Set(reflect.Zero(t)) + return nil + } + if k == reflect.Array { + if rv.Len() != info.Dimensions { + return unmarshalErrorf("unmarshal vector: array of size %d cannot store vector of %d dimensions", rv.Len(), info.Dimensions) + } + } else { + rv.Set(reflect.MakeSlice(t, info.Dimensions, info.Dimensions)) + } + elemSize := len(data) / info.Dimensions + for i := 0; i < info.Dimensions; i++ { + offset := 0 + if isVectorVariableLengthType(info.SubType) { + m, p, err := readUnsignedVint(data, 0) + if err != nil { + return err + } + elemSize = int(m) + offset = p + } + if offset > 0 { + data = data[offset:] + } + var unmarshalData []byte + if elemSize >= 0 { + if len(data) < elemSize { + return unmarshalErrorf("unmarshal vector: unexpected eof") + } + unmarshalData = data[:elemSize] + data = data[elemSize:] + } + err := Unmarshal(info.SubType, unmarshalData, rv.Index(i).Addr().Interface()) + if err != nil { + return unmarshalErrorf("failed to unmarshal %s into %T: %s", info.SubType, unmarshalData, err.Error()) + } + } + return nil + } + return unmarshalErrorf("can not unmarshal %s into %T", info, value) +} + +func isVectorVariableLengthType(elemType TypeInfo) bool { + switch elemType.Type() { + case TypeVarchar, TypeAscii, TypeBlob, TypeText: + return true + case TypeCounter: + return true + case TypeDuration, TypeDate, TypeTime: + return true + case TypeDecimal, TypeSmallInt, TypeTinyInt, TypeVarint: + return true + case TypeInet: + return true + case TypeList, TypeSet, TypeMap, TypeUDT, TypeTuple: + return true Review Comment: You mean that instead all of this cases could be single "if", or one case for all suitable types? As I can see, it's structured by similar types for better readability, not sure that it should be merged into one. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For additional commands, e-mail: pr-h...@cassandra.apache.org