This is an automated email from the ASF dual-hosted git repository. raulcd pushed a commit to branch maint-12.0.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit cfdb8cce04a6ff42702318f79204c014606960ca Author: Yevgeny Pats <[email protected]> AuthorDate: Thu Apr 13 14:11:56 2023 -0400 GH-34657: [Go] Add ValueString(i int) string to array (#34986) ### Rationale for this change ### What changes are included in this PR? ### Are these changes tested? ### Are there any user-facing changes? * Closes: #34657 Authored-by: Yevgeny Pats <[email protected]> Signed-off-by: Matt Topol <[email protected]> --- go/arrow/array.go | 2 + go/arrow/array/array.go | 1 + go/arrow/array/binary.go | 17 +- go/arrow/array/binary_test.go | 8 + go/arrow/array/binarybuilder.go | 20 +++ go/arrow/array/binarybuilder_test.go | 5 + go/arrow/array/boolean.go | 9 + go/arrow/array/boolean_test.go | 4 + go/arrow/array/booleanbuilder.go | 13 ++ go/arrow/array/booleanbuilder_test.go | 8 +- go/arrow/array/builder.go | 3 + go/arrow/array/decimal128.go | 21 +++ go/arrow/array/decimal128_test.go | 2 + go/arrow/array/decimal256.go | 21 +++ go/arrow/array/decimal256_test.go | 2 + go/arrow/array/dictionary.go | 8 + go/arrow/array/dictionary_test.go | 2 +- go/arrow/array/encoded.go | 15 ++ go/arrow/array/encoded_test.go | 1 + go/arrow/array/extension.go | 8 +- go/arrow/array/fixed_size_list.go | 12 ++ go/arrow/array/fixed_size_list_test.go | 3 + go/arrow/array/fixedsize_binary.go | 7 + go/arrow/array/fixedsize_binary_test.go | 5 +- go/arrow/array/fixedsize_binarybuilder.go | 15 ++ go/arrow/array/float16.go | 1 + go/arrow/array/float16_builder.go | 14 ++ go/arrow/array/float16_builder_test.go | 11 +- go/arrow/array/interval.go | 61 +++++++ go/arrow/array/list.go | 18 ++ go/arrow/array/list_test.go | 2 + go/arrow/array/map.go | 4 + go/arrow/array/null.go | 11 ++ go/arrow/array/numeric.gen.go | 115 +++++++++++++ go/arrow/array/numeric.gen.go.tmpl | 28 +++- go/arrow/array/numericbuilder.gen.go | 218 +++++++++++++++++++++++++ go/arrow/array/numericbuilder.gen.go.tmpl | 73 ++++++++- go/arrow/array/numericbuilder.gen_test.go | 7 +- go/arrow/array/numericbuilder.gen_test.go.tmpl | 5 +- go/arrow/array/string.go | 8 + go/arrow/array/string_test.go | 5 +- go/arrow/array/struct.go | 18 ++ go/arrow/array/struct_test.go | 6 +- go/arrow/array/union.go | 27 +++ go/arrow/csv/transformer.go | 2 +- go/internal/types/extension_types.go | 12 +- 46 files changed, 825 insertions(+), 33 deletions(-) diff --git a/go/arrow/array.go b/go/arrow/array.go index 9aad42b87c..ffcd20ccba 100644 --- a/go/arrow/array.go +++ b/go/arrow/array.go @@ -105,6 +105,8 @@ type Array interface { // IsValid returns true if value at index is not null. // NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. IsValid(i int) bool + // ValueStr returns the value at index as a string. + ValueStr(i int) string // Get single value to be marshalled with `json.Marshal` GetOneForMarshal(i int) interface{} diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go index 25c245f346..6231ae9a3d 100644 --- a/go/arrow/array/array.go +++ b/go/arrow/array/array.go @@ -27,6 +27,7 @@ import ( const ( // UnknownNullCount specifies the NullN should be calculated from the null bitmap buffer. UnknownNullCount = -1 + NullValueStr = "(null)" ) type array struct { diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go index 4a760d5720..0cc8e21653 100644 --- a/go/arrow/array/binary.go +++ b/go/arrow/array/binary.go @@ -18,6 +18,7 @@ package array import ( "bytes" + "encoding/base64" "fmt" "strings" "unsafe" @@ -56,7 +57,15 @@ func (a *Binary) Value(i int) []byte { return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] } -// ValueString returns the string at index i without performing additional allocations. +// ValueString returns the string at index i +func (a *Binary) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return base64.StdEncoding.EncodeToString(a.Value(i)) +} + +// ValueStr returns the string at index i without performing additional allocations. // The string is only valid for the lifetime of the Binary array. func (a *Binary) ValueString(i int) string { b := a.Value(i) @@ -191,6 +200,12 @@ func (a *LargeBinary) Value(i int) []byte { return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] } +func (a *LargeBinary) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return base64.StdEncoding.EncodeToString(a.Value(i)) +} func (a *LargeBinary) ValueString(i int) string { b := a.Value(i) return *(*string)(unsafe.Pointer(&b)) diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go index f999ab0fb4..bf90972688 100644 --- a/go/arrow/array/binary_test.go +++ b/go/arrow/array/binary_test.go @@ -50,6 +50,8 @@ func TestBinary(t *testing.T) { assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) + assert.Equal(t, "QUFB", a.ValueStr(0)) + assert.Equal(t, "(null)", a.ValueStr(1)) a.Release() // Test builder reset and NewArray API. @@ -60,6 +62,8 @@ func TestBinary(t *testing.T) { assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) + assert.Equal(t, "QUFB", a.ValueStr(0)) + assert.Equal(t, "(null)", a.ValueStr(1)) a.Release() b.Release() @@ -92,6 +96,8 @@ func TestLargeBinary(t *testing.T) { assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) + assert.Equal(t, "QUFB", a.ValueStr(0)) + assert.Equal(t, "(null)", a.ValueStr(1)) a.Release() // Test builder reset and NewArray API. @@ -102,6 +108,8 @@ func TestLargeBinary(t *testing.T) { assert.Equal(t, []byte("AAA"), a.Value(0)) assert.Equal(t, []byte{}, a.Value(1)) assert.Equal(t, []byte("BBBB"), a.Value(2)) + assert.Equal(t, "QUFB", a.ValueStr(0)) + assert.Equal(t, "(null)", a.ValueStr(1)) a.Release() b.Release() diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go index dbba35f79a..401587e0e7 100644 --- a/go/arrow/array/binarybuilder.go +++ b/go/arrow/array/binarybuilder.go @@ -289,6 +289,26 @@ func (b *BinaryBuilder) appendNextOffset() { b.appendOffsetVal(numBytes) } +func (b *BinaryBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + switch b.dtype.ID() { + case arrow.BINARY, arrow.LARGE_BINARY: + decodedVal, err := base64.StdEncoding.DecodeString(s) + if err != nil { + return fmt.Errorf("could not decode base64 string: %w", err) + } + b.Append(decodedVal) + case arrow.STRING, arrow.LARGE_STRING: + b.Append([]byte(s)) + default: + return fmt.Errorf("cannot append string to type %s", b.dtype) + } + return nil +} + func (b *BinaryBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go index 4ebd8b7d71..566bb2f67e 100644 --- a/go/arrow/array/binarybuilder_test.go +++ b/go/arrow/array/binarybuilder_test.go @@ -38,6 +38,7 @@ func TestBinaryBuilder(t *testing.T) { ab.AppendNull() } else { ab.Append(v) + } } @@ -50,8 +51,12 @@ func TestBinaryBuilder(t *testing.T) { } assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) } + // Zm9v is foo in base64 + assert.NoError(t, ab.AppendValueFromString("Zm9v")) ar := ab.NewBinaryArray() + assert.Equal(t, "Zm9v", ar.ValueStr(5)) + ab.Release() ar.Release() diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go index 6ab64eabc6..0f386bea00 100644 --- a/go/arrow/array/boolean.go +++ b/go/arrow/array/boolean.go @@ -18,6 +18,7 @@ package array import ( "fmt" + "strconv" "strings" "github.com/apache/arrow/go/v12/arrow" @@ -55,6 +56,14 @@ func (a *Boolean) Value(i int) bool { return bitutil.BitIsSet(a.values, a.array.data.offset+i) } +func (a *Boolean) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } else { + return strconv.FormatBool(a.Value(i)) + } +} + func (a *Boolean) String() string { o := new(strings.Builder) o.WriteString("[") diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go index 0ad4cad10f..80d6ea7d96 100644 --- a/go/arrow/array/boolean_test.go +++ b/go/arrow/array/boolean_test.go @@ -24,6 +24,7 @@ import ( "github.com/apache/arrow/go/v12/arrow/array" "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/stretchr/testify/assert" ) func TestBooleanSliceData(t *testing.T) { @@ -285,4 +286,7 @@ func TestBooleanStringer(t *testing.T) { if got := out.String(); got != want { t.Fatalf("invalid stringer:\ngot= %q\nwant=%q", got, want) } + assert.Equal(t, "true", arr.ValueStr(0)) + assert.Equal(t, "false", arr.ValueStr(1)) + assert.Equal(t, "(null)", arr.ValueStr(2)) } diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go index b2d25b4db8..75c53a4fb7 100644 --- a/go/arrow/array/booleanbuilder.go +++ b/go/arrow/array/booleanbuilder.go @@ -82,6 +82,19 @@ func (b *BooleanBuilder) AppendEmptyValue() { b.UnsafeAppend(false) } +func (b *BooleanBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + val, err := strconv.ParseBool(s) + if err != nil { + return err + } + b.Append(val) + return nil +} + func (b *BooleanBuilder) UnsafeAppend(v bool) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) if v { diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go index d343294cae..2b89fbee8a 100644 --- a/go/arrow/array/booleanbuilder_test.go +++ b/go/arrow/array/booleanbuilder_test.go @@ -31,16 +31,20 @@ func TestBooleanBuilder_AppendValues(t *testing.T) { b := array.NewBooleanBuilder(mem) - exp := tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) - got := make([]bool, len(exp)) + exp := tools.Bools(1, 1, 0, 1, 1, 0) + got := make([]bool, len(exp) + 2) b.AppendValues(exp, nil) + assert.NoError(t, b.AppendValueFromString("true")) + assert.NoError(t, b.AppendValueFromString("false")) + exp = tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) a := b.NewBooleanArray() b.Release() for i := 0; i < a.Len(); i++ { got[i] = a.Value(i) } assert.Equal(t, exp, got) + a.Release() } diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index 26cc76bb1f..1805d97357 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -61,6 +61,9 @@ type Builder interface { // AppendEmptyValue adds a new zero value of the appropriate type AppendEmptyValue() + // AppendValueFromString adds a new value from a string. Inverse of array.ValueStr(i int) string + AppendValueFromString(string) error + // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. Reserve(n int) diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go index fb6423e201..bc2db26fca 100644 --- a/go/arrow/array/decimal128.go +++ b/go/arrow/array/decimal128.go @@ -48,6 +48,13 @@ func NewDecimal128Data(data arrow.ArrayData) *Decimal128 { } func (a *Decimal128) Value(i int) decimal128.Num { return a.values[i] } +func (a *Decimal128) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } else { + return a.GetOneForMarshal(i).(string) + } +} func (a *Decimal128) Values() []decimal128.Num { return a.values } @@ -260,6 +267,20 @@ func (b *Decimal128Builder) newData() (data *Data) { return } +func (b *Decimal128Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + val, err := decimal128.FromString(s, b.dtype.Precision, b.dtype.Scale) + if err != nil { + b.AppendNull() + return err + } + b.Append(val) + return nil +} + func (b *Decimal128Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go index 59064b2ac6..305bed6d49 100644 --- a/go/arrow/array/decimal128_test.go +++ b/go/arrow/array/decimal128_test.go @@ -171,6 +171,8 @@ func TestDecimal128Slice(t *testing.T) { if got, want := v.String(), `[(null) {4 -4}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } + assert.Equal(t, "(null)", v.ValueStr(0)) + assert.Equal(t, "-7.378697629e+18", v.ValueStr(1)) if got, want := v.NullN(), 1; got != want { t.Fatalf("got=%q, want=%q", got, want) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index 0f007c7c6f..0b9cfed192 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -48,6 +48,13 @@ func NewDecimal256Data(data arrow.ArrayData) *Decimal256 { } func (a *Decimal256) Value(i int) decimal256.Num { return a.values[i] } +func (a *Decimal256) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } else { + return a.GetOneForMarshal(i).(string) + } +} func (a *Decimal256) Values() []decimal256.Num { return a.values } @@ -259,6 +266,20 @@ func (b *Decimal256Builder) newData() (data *Data) { return } +func (b *Decimal256Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + val, err := decimal256.FromString(s, b.dtype.Precision, b.dtype.Scale) + if err != nil { + b.AppendNull() + return err + } + b.Append(val) + return nil +} + func (b *Decimal256Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go index de2641b9bf..1be9fce806 100644 --- a/go/arrow/array/decimal256_test.go +++ b/go/arrow/array/decimal256_test.go @@ -172,6 +172,8 @@ func TestDecimal256Slice(t *testing.T) { if got, want := v.String(), `[(null) {[4 4 4 4]}]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } + assert.Equal(t, "(null)", v.ValueStr(0)) + assert.Equal(t, "2.510840694e+57", v.ValueStr(1)) if got, want := v.NullN(), 1; got != want { t.Fatalf("got=%q, want=%q", got, want) diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go index 70f77e242b..6ecda0b966 100644 --- a/go/arrow/array/dictionary.go +++ b/go/arrow/array/dictionary.go @@ -253,6 +253,10 @@ func (d *Dictionary) CanCompareIndices(other *Dictionary) bool { return ArraySliceEqual(d.Dictionary(), 0, minlen, other.Dictionary(), 0, minlen) } +func (d *Dictionary) ValueStr(i int) string { + return d.Dictionary().ValueStr(d.GetValueIndex(i)) +} + func (d *Dictionary) String() string { return fmt.Sprintf("{ dictionary: %v\n indices: %v }", d.Dictionary(), d.Indices()) } @@ -737,6 +741,10 @@ func (b *dictionaryBuilder) Unmarshal(dec *json.Decoder) error { return b.AppendArray(arr) } +func (b *dictionaryBuilder) AppendValueFromString(s string) error { + return fmt.Errorf("%w: AppendValueFromString to dictionary not yet implemented", arrow.ErrNotImplemented) +} + func (b *dictionaryBuilder) UnmarshalOne(dec *json.Decoder) error { return errors.New("unmarshal json to dictionary not yet implemented") } diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go index 667bf5b24a..6f7794b27b 100644 --- a/go/arrow/array/dictionary_test.go +++ b/go/arrow/array/dictionary_test.go @@ -1173,7 +1173,7 @@ func TestDictionaryGetValueIndex(t *testing.T) { const offset = 1 slicedDictArr := array.NewSlice(dictArr, offset, int64(dictArr.Len())) defer slicedDictArr.Release() - + assert.EqualValues(t, "10", slicedDictArr.(*array.Dictionary).ValueStr(0)) for i := 0; i < indices.Len(); i++ { assert.EqualValues(t, i64Index.Value(i), dictArr.GetValueIndex(i)) if i < slicedDictArr.Len() { diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go index 2668c908b1..3571a971cd 100644 --- a/go/arrow/array/encoded.go +++ b/go/arrow/array/encoded.go @@ -21,6 +21,7 @@ import ( "fmt" "math" "reflect" + "strings" "sync/atomic" "github.com/apache/arrow/go/v12/arrow" @@ -192,6 +193,15 @@ func (r *RunEndEncoded) GetPhysicalLength() int { return encoded.GetPhysicalLength(r.data) } +func (r *RunEndEncoded) ValueStr(i int) string { + value := r.values.GetOneForMarshal(i) + if byts, ok := value.(json.RawMessage); ok { + value = string(byts) + } + return fmt.Sprintf("{%d -> %v}", + r.ends.GetOneForMarshal(i), + value) +} func (r *RunEndEncoded) String() string { var buf bytes.Buffer buf.WriteByte('[') @@ -397,6 +407,11 @@ func (b *RunEndEncodedBuilder) newData() (data *Data) { return } +func (b *RunEndEncodedBuilder) AppendValueFromString(s string) error { + dec := json.NewDecoder(strings.NewReader(s)) + return b.UnmarshalOne(dec) +} + func (b *RunEndEncodedBuilder) UnmarshalOne(dec *json.Decoder) error { var value interface{} if err := dec.Decode(&value); err != nil { diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go index 27bbff1884..573a194e7f 100644 --- a/go/arrow/array/encoded_test.go +++ b/go/arrow/array/encoded_test.go @@ -250,6 +250,7 @@ func TestRunEndEncodedBuilder(t *testing.T) { assert.Equal(t, "of", strValues.Value(3)) assert.Equal(t, "RLE", strValues.Value(4)) assert.True(t, strValues.IsNull(5)) + assert.Equal(t, "Hello", strValues.ValueStr(0)) } func TestREEBuilderOverflow(t *testing.T) { diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go index 3ad4ec57c6..38f2630f1f 100644 --- a/go/arrow/array/extension.go +++ b/go/arrow/array/extension.go @@ -38,8 +38,6 @@ type ExtensionArray interface { ExtensionType() arrow.ExtensionType // Storage returns the underlying storage array for this array. Storage() arrow.Array - // ValueString returns a string represenation of the value at the given index for the extension array. - ValueString(i int) string // by having a non-exported function in the interface, it means that // consumers must embed ExtensionArrayBase in their structs in order // to fulfill this interface. @@ -185,10 +183,10 @@ func (e *ExtensionArrayBase) setData(data *Data) { e.storage = MakeFromData(storageData) } -// ValueString returns the value at index i as a string. +// ValueStr returns the value at index i as a string. // This needs to be implemented by the extension array type. -func (e *ExtensionArrayBase) ValueString(i int) string { - panic("arrow/array: ValueString wasn't implemented by this extension array type") +func (e *ExtensionArrayBase) ValueStr(i int) string { + panic("arrow/array: ValueStr wasn't implemented by this extension array type") } // no-op function that exists simply to force embedding this in any extension array types. diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go index c79573da9e..c0c8676cd9 100644 --- a/go/arrow/array/fixed_size_list.go +++ b/go/arrow/array/fixed_size_list.go @@ -46,6 +46,12 @@ func NewFixedSizeListData(data arrow.ArrayData) *FixedSizeList { func (a *FixedSizeList) ListValues() arrow.Array { return a.values } +func (a *FixedSizeList) ValueStr(i int) string { + if !a.IsValid(i) { + return NullValueStr + } + return string(a.GetOneForMarshal(i).(json.RawMessage)) +} func (a *FixedSizeList) String() string { o := new(strings.Builder) o.WriteString("[") @@ -278,6 +284,12 @@ func (b *FixedSizeListBuilder) newData() (data *Data) { return } + +func (b *FixedSizeListBuilder) AppendValueFromString(s string) error { + dec := json.NewDecoder(strings.NewReader(s)) + return b.UnmarshalOne(dec) +} + func (b *FixedSizeListBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go index 75e6078bdc..8372a02c54 100644 --- a/go/arrow/array/fixed_size_list_test.go +++ b/go/arrow/array/fixed_size_list_test.go @@ -23,6 +23,7 @@ import ( "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/stretchr/testify/assert" ) func TestFixedSizeListArray(t *testing.T) { @@ -171,6 +172,8 @@ func TestFixedSizeListArrayStringer(t *testing.T) { if got, want := arr.String(), want; got != want { t.Fatalf("got=%q, want=%q", got, want) } + assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) + assert.Equal(t, "(null)", arr.ValueStr(1)) } func TestFixedSizeListArraySlice(t *testing.T) { diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go index 3c014c976d..51e0b97488 100644 --- a/go/arrow/array/fixedsize_binary.go +++ b/go/arrow/array/fixedsize_binary.go @@ -18,6 +18,7 @@ package array import ( "bytes" + "encoding/base64" "fmt" "strings" @@ -51,6 +52,12 @@ func (a *FixedSizeBinary) Value(i int) []byte { ) return a.valueBytes[beg:end] } +func (a *FixedSizeBinary) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return base64.StdEncoding.EncodeToString(a.Value(i)) +} func (a *FixedSizeBinary) String() string { o := new(strings.Builder) diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go index 52d4223346..7e1eb373bf 100644 --- a/go/arrow/array/fixedsize_binary_test.go +++ b/go/arrow/array/fixedsize_binary_test.go @@ -42,14 +42,17 @@ func TestFixedSizeBinary(t *testing.T) { } valid := []bool{true, false, true} b.AppendValues(values, valid) + // encoded abcdefg base64 + assert.NoError(t, b.AppendValueFromString("YWJjZGVmZw==")) b.Retain() b.Release() a := b.NewFixedSizeBinaryArray() - assert.Equal(t, 3, a.Len()) + assert.Equal(t, 4, a.Len()) assert.Equal(t, 1, a.NullN()) assert.Equal(t, []byte("7654321"), a.Value(0)) + assert.Equal(t, "YWJjZGVmZw==", a.ValueStr(3)) assert.Equal(t, zero, a.Value(1)) assert.Equal(t, true, a.IsNull(1)) assert.Equal(t, false, a.IsValid(1)) diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go index 260d8b686d..02f355eec8 100644 --- a/go/arrow/array/fixedsize_binarybuilder.go +++ b/go/arrow/array/fixedsize_binarybuilder.go @@ -166,6 +166,21 @@ func (b *FixedSizeBinaryBuilder) newData() (data *Data) { return } +func (b *FixedSizeBinaryBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + + data, err := base64.StdEncoding.DecodeString(s) + if err != nil { + b.AppendNull() + return err + } + b.Append(data) + return nil +} + func (b *FixedSizeBinaryBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go index b4ed3150fa..252a81fce3 100644 --- a/go/arrow/array/float16.go +++ b/go/arrow/array/float16.go @@ -39,6 +39,7 @@ func NewFloat16Data(data arrow.ArrayData) *Float16 { } func (a *Float16) Value(i int) float16.Num { return a.values[i] } +func (a *Float16) ValueStr(i int) string { return a.Value(i).String()} func (a *Float16) Values() []float16.Num { return a.values } diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go index 06ae2c4dfb..77efb4d479 100644 --- a/go/arrow/array/float16_builder.go +++ b/go/arrow/array/float16_builder.go @@ -176,6 +176,20 @@ func (b *Float16Builder) newData() (data *Data) { return } +func (b *Float16Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseFloat(s, 32) + if err != nil { + b.AppendNull() + return err + } + b.Append(float16.New(float32(v))) + return nil +} + func (b *Float16Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go index a0d1219578..eca5f55c24 100644 --- a/go/arrow/array/float16_builder_test.go +++ b/go/arrow/array/float16_builder_test.go @@ -49,13 +49,14 @@ func TestNewFloat16Builder(t *testing.T) { ab.Append(float16.New(8)) ab.Append(float16.New(9)) ab.Append(float16.New(10)) - + assert.NoError(t, ab.AppendValueFromString("11.0")) + // check state of builder before NewFloat16Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") + assert.Equal(t, 11, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewFloat16Array() - + assert.Equal(t, "1", a.ValueStr(0)) // check state of builder after NewFloat16Array assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat16Array did not reset state") assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat16Array did not reset state") @@ -64,9 +65,9 @@ func TestNewFloat16Builder(t *testing.T) { // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, float32Values(a), "unexpected Float16Values") + assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, 11}, float32Values(a), "unexpected Float16Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Values(), 10, "unexpected length of Float16Values") + assert.Len(t, a.Values(), 11, "unexpected length of Float16Values") a.Release() ab.Append(float16.New(7)) diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go index d5844ea33d..cd8eaae072 100644 --- a/go/arrow/array/interval.go +++ b/go/arrow/array/interval.go @@ -19,6 +19,7 @@ package array import ( "bytes" "fmt" + "strconv" "strings" "sync/atomic" @@ -56,6 +57,12 @@ func NewMonthIntervalData(data arrow.ArrayData) *MonthInterval { } func (a *MonthInterval) Value(i int) arrow.MonthInterval { return a.values[i] } +func (a *MonthInterval) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return fmt.Sprintf("%v", a.Value(i)) +} func (a *MonthInterval) MonthIntervalValues() []arrow.MonthInterval { return a.values } func (a *MonthInterval) String() string { @@ -267,6 +274,20 @@ func (b *MonthIntervalBuilder) newData() (data *Data) { return } +func (b *MonthIntervalBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseInt(s, 10, 32) + if err != nil { + b.AppendNull() + return err + } + b.Append(arrow.MonthInterval(v)) + return nil +} + func (b *MonthIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { var v *arrow.MonthInterval if err := dec.Decode(&v); err != nil { @@ -321,6 +342,12 @@ func NewDayTimeIntervalData(data arrow.ArrayData) *DayTimeInterval { } func (a *DayTimeInterval) Value(i int) arrow.DayTimeInterval { return a.values[i] } +func (a *DayTimeInterval) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return fmt.Sprintf("%q", a.Value(i)) +} func (a *DayTimeInterval) DayTimeIntervalValues() []arrow.DayTimeInterval { return a.values } func (a *DayTimeInterval) String() string { @@ -530,6 +557,20 @@ func (b *DayTimeIntervalBuilder) newData() (data *Data) { return } +func (b *DayTimeIntervalBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + var v arrow.DayTimeInterval + if err := json.Unmarshal([]byte(s), &v); err != nil { + b.AppendNull() + return err + } + b.Append(v) + return nil +} + func (b *DayTimeIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { var v *arrow.DayTimeInterval if err := dec.Decode(&v); err != nil { @@ -583,6 +624,13 @@ func NewMonthDayNanoIntervalData(data arrow.ArrayData) *MonthDayNanoInterval { } func (a *MonthDayNanoInterval) Value(i int) arrow.MonthDayNanoInterval { return a.values[i] } +func (a *MonthDayNanoInterval) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return fmt.Sprintf("%q", a.Value(i)) +} + func (a *MonthDayNanoInterval) MonthDayNanoIntervalValues() []arrow.MonthDayNanoInterval { return a.values } @@ -796,6 +844,19 @@ func (b *MonthDayNanoIntervalBuilder) newData() (data *Data) { return } +func (b *MonthDayNanoIntervalBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + var v arrow.MonthDayNanoInterval + if err := json.Unmarshal([]byte(s), &v); err != nil { + return err + } + b.Append(v) + return nil +} + func (b *MonthDayNanoIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { var v *arrow.MonthDayNanoInterval if err := dec.Decode(&v); err != nil { diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 168798f064..374ed28189 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -52,6 +52,13 @@ func NewListData(data arrow.ArrayData) *List { func (a *List) ListValues() arrow.Array { return a.values } +func (a *List) ValueStr(i int) string { + if !a.IsValid(i) { + return NullValueStr + } + return string(a.GetOneForMarshal(i).(json.RawMessage)) +} + func (a *List) String() string { o := new(strings.Builder) o.WriteString("[") @@ -175,6 +182,12 @@ func NewLargeListData(data arrow.ArrayData) *LargeList { func (a *LargeList) ListValues() arrow.Array { return a.values } +func (a *LargeList) ValueStr(i int) string { + if !a.IsValid(i) { + return NullValueStr + } + return string(a.GetOneForMarshal(i).(json.RawMessage)) +} func (a *LargeList) String() string { o := new(strings.Builder) o.WriteString("[") @@ -531,6 +544,11 @@ func (b *baseListBuilder) newData() (data *Data) { return } +func (b *baseListBuilder) AppendValueFromString(s string) error { + dec := json.NewDecoder(strings.NewReader(s)) + return b.UnmarshalOne(dec) +} + func (b *baseListBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go index 2ddb1038bc..65ea9b9111 100644 --- a/go/arrow/array/list_test.go +++ b/go/arrow/array/list_test.go @@ -23,6 +23,7 @@ import ( "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/stretchr/testify/assert" ) func TestListArray(t *testing.T) { @@ -289,6 +290,7 @@ func TestListArraySlice(t *testing.T) { if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { t.Fatalf("got=%q, want=%q", got, want) } + assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) sub := array.NewSlice(arr, 1, 4).(array.ListLike) defer sub.Release() diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go index b2b7174202..c28a3f9d66 100644 --- a/go/arrow/array/map.go +++ b/go/arrow/array/map.go @@ -300,6 +300,10 @@ func (b *MapBuilder) ValueBuilder() Builder { return b.listBuilder.ValueBuilder() } +func (b *MapBuilder) AppendValueFromString(s string) error { + return arrow.ErrNotImplemented +} + func (b *MapBuilder) UnmarshalOne(dec *json.Decoder) error { return b.listBuilder.UnmarshalOne(dec) } diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go index f48d8387c5..8a547a5b74 100644 --- a/go/arrow/array/null.go +++ b/go/arrow/array/null.go @@ -58,6 +58,10 @@ func NewNullData(data arrow.ArrayData) *Null { return a } +func (a *Null) ValueStr(i int) string { + return NullValueStr +} + func (a *Null) String() string { o := new(strings.Builder) o.WriteString("[") @@ -114,6 +118,13 @@ func (b *NullBuilder) AppendNull() { b.builder.nulls++ } +func (b *NullBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + return fmt.Errorf("cannot convert %q to null", s) +} func (b *NullBuilder) AppendEmptyValue() { b.AppendNull() } func (*NullBuilder) Reserve(size int) {} diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index 08dc77501e..5e9d5c7560 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -20,6 +20,7 @@ package array import ( "fmt" + "strconv" "strings" "github.com/apache/arrow/go/v12/arrow" @@ -81,6 +82,13 @@ func (a *Int64) setData(data *Data) { } } +func (a *Int64) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatInt(int64(a.Value(i)), 10) +} + func (a *Int64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -169,6 +177,13 @@ func (a *Uint64) setData(data *Data) { } } +func (a *Uint64) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatUint(uint64(a.Value(i)), 10) +} + func (a *Uint64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -257,6 +272,13 @@ func (a *Float64) setData(data *Data) { } } +func (a *Float64) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) +} + func (a *Float64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -345,6 +367,13 @@ func (a *Int32) setData(data *Data) { } } +func (a *Int32) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatInt(int64(a.Value(i)), 10) +} + func (a *Int32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -433,6 +462,13 @@ func (a *Uint32) setData(data *Data) { } } +func (a *Uint32) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatUint(uint64(a.Value(i)), 10) +} + func (a *Uint32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -521,6 +557,13 @@ func (a *Float32) setData(data *Data) { } } +func (a *Float32) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) +} + func (a *Float32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -609,6 +652,13 @@ func (a *Int16) setData(data *Data) { } } +func (a *Int16) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatInt(int64(a.Value(i)), 10) +} + func (a *Int16) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -697,6 +747,13 @@ func (a *Uint16) setData(data *Data) { } } +func (a *Uint16) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatUint(uint64(a.Value(i)), 10) +} + func (a *Uint16) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -785,6 +842,13 @@ func (a *Int8) setData(data *Data) { } } +func (a *Int8) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatInt(int64(a.Value(i)), 10) +} + func (a *Int8) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -873,6 +937,13 @@ func (a *Uint8) setData(data *Data) { } } +func (a *Uint8) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return strconv.FormatUint(uint64(a.Value(i)), 10) +} + func (a *Uint8) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -961,6 +1032,13 @@ func (a *Timestamp) setData(data *Data) { } } +func (a *Timestamp) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 15:04:05.999999999") +} + func (a *Timestamp) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -1044,6 +1122,14 @@ func (a *Time32) setData(data *Data) { } } +func (a *Time32) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + a.values[i].FormattedString(a.DataType().(*arrow.Time32Type).Unit) + return a.values[i].ToTime(a.DataType().(*arrow.Time32Type).Unit).Format("15:04:05.999999999") +} + func (a *Time32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -1127,6 +1213,13 @@ func (a *Time64) setData(data *Data) { } } +func (a *Time64) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return a.values[i].ToTime(a.DataType().(*arrow.Time64Type).Unit).Format("15:04:05.999999999") +} + func (a *Time64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -1210,6 +1303,13 @@ func (a *Date32) setData(data *Data) { } } +func (a *Date32) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return a.values[i].ToTime().Format("2006-01-02") +} + func (a *Date32) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -1293,6 +1393,13 @@ func (a *Date64) setData(data *Data) { } } +func (a *Date64) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + return a.values[i].ToTime().Format("2006-01-02") +} + func (a *Date64) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -1376,6 +1483,14 @@ func (a *Duration) setData(data *Data) { } } +func (a *Duration) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } + // return value and suffix as a string such as "12345ms" + return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*arrow.DurationType).Unit.String()) +} + func (a *Duration) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl index 3fd7ae6e98..01418520fb 100644 --- a/go/arrow/array/numeric.gen.go.tmpl +++ b/go/arrow/array/numeric.gen.go.tmpl @@ -82,6 +82,32 @@ func (a *{{.Name}}) setData(data *Data) { } } +func (a *{{.Name}}) ValueStr(i int) string { + if a.IsNull(i) { + return NullValueStr + } +{{if or (eq .Name "Date32") (eq .Name "Date64") -}} + return a.values[i].FormattedString() +{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} + return a.values[i].FormattedString(a.DataType().(*{{.QualifiedType}}Type).Unit) +{{else if or (eq .Name "Timestamp") -}} + return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("2006-01-02 15:04:05.999999999") +{{else if (eq .Name "Duration") -}} + // return value and suffix as a string such as "12345ms" + return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit) +{{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} + return strconv.FormatInt(int64(a.Value(i)), 10) +{{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} + return strconv.FormatUint(uint64(a.Value(i)), 10) +{{else if or (eq .Name "Float32") -}} + return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) +{{else if or (eq .Name "Float64") -}} + return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) +{{else}} + return fmt.Sprintf("%v", a.values[i]) +{{end -}} +} + func (a *{{.Name}}) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil @@ -106,7 +132,7 @@ func (a *{{.Name}}) MarshalJSON() ([]byte, error) { {{if .QualifiedType -}} vals := make([]interface{}, a.Len()) for i := range a.values { - vals[i] = a.getOneForMarshal(i) + vals[i] = a.GetOneForMarshal(i) } {{else -}} vals := make([]interface{}, a.Len()) diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go index 81cf262fd1..0cff3a581f 100644 --- a/go/arrow/array/numericbuilder.gen.go +++ b/go/arrow/array/numericbuilder.gen.go @@ -176,6 +176,20 @@ func (b *Int64Builder) newData() (data *Data) { return } +func (b *Int64Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseInt(s, 10, 64) + if err != nil { + b.AppendNull() + return err + } + b.Append(int64(v)) + return nil +} + func (b *Int64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -385,6 +399,20 @@ func (b *Uint64Builder) newData() (data *Data) { return } +func (b *Uint64Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseUint(s, 10, 64) + if err != nil { + b.AppendNull() + return err + } + b.Append(uint64(v)) + return nil +} + func (b *Uint64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -594,6 +622,20 @@ func (b *Float64Builder) newData() (data *Data) { return } +func (b *Float64Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseFloat(s, 64) + if err != nil { + b.AppendNull() + return err + } + b.Append(float64(v)) + return nil +} + func (b *Float64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -803,6 +845,20 @@ func (b *Int32Builder) newData() (data *Data) { return } +func (b *Int32Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseInt(s, 10, 32) + if err != nil { + b.AppendNull() + return err + } + b.Append(int32(v)) + return nil +} + func (b *Int32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -1012,6 +1068,20 @@ func (b *Uint32Builder) newData() (data *Data) { return } +func (b *Uint32Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseUint(s, 10, 32) + if err != nil { + b.AppendNull() + return err + } + b.Append(uint32(v)) + return nil +} + func (b *Uint32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -1221,6 +1291,20 @@ func (b *Float32Builder) newData() (data *Data) { return } +func (b *Float32Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseFloat(s, 32) + if err != nil { + b.AppendNull() + return err + } + b.Append(float32(v)) + return nil +} + func (b *Float32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -1430,6 +1514,20 @@ func (b *Int16Builder) newData() (data *Data) { return } +func (b *Int16Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseInt(s, 10, 16) + if err != nil { + b.AppendNull() + return err + } + b.Append(int16(v)) + return nil +} + func (b *Int16Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -1639,6 +1737,20 @@ func (b *Uint16Builder) newData() (data *Data) { return } +func (b *Uint16Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseUint(s, 10, 16) + if err != nil { + b.AppendNull() + return err + } + b.Append(uint16(v)) + return nil +} + func (b *Uint16Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -1848,6 +1960,20 @@ func (b *Int8Builder) newData() (data *Data) { return } +func (b *Int8Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseInt(s, 10, 8) + if err != nil { + b.AppendNull() + return err + } + b.Append(int8(v)) + return nil +} + func (b *Int8Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -2057,6 +2183,20 @@ func (b *Uint8Builder) newData() (data *Data) { return } +func (b *Uint8Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := strconv.ParseUint(s, 10, 8) + if err != nil { + b.AppendNull() + return err + } + b.Append(uint8(v)) + return nil +} + func (b *Uint8Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -2267,6 +2407,20 @@ func (b *TimestampBuilder) newData() (data *Data) { return } +func (b *TimestampBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + v, err := arrow.TimestampFromString(s, b.dtype.Unit) + if err != nil { + b.AppendNull() + return err + } + b.Append(v) + return nil +} + func (b *TimestampBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -2479,6 +2633,20 @@ func (b *Time32Builder) newData() (data *Data) { return } +func (b *Time32Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + val, err := arrow.Time32FromString(s, b.dtype.Unit) + if err != nil { + b.AppendNull() + return err + } + b.Append(val) + return nil +} + func (b *Time32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -2690,6 +2858,20 @@ func (b *Time64Builder) newData() (data *Data) { return } +func (b *Time64Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + val, err := arrow.Time64FromString(s, b.dtype.Unit) + if err != nil { + b.AppendNull() + return err + } + b.Append(val) + return nil +} + func (b *Time64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -2900,6 +3082,20 @@ func (b *Date32Builder) newData() (data *Data) { return } +func (b *Date32Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + tm, err := time.Parse("2006-01-02", s) + if err != nil { + b.AppendNull() + return err + } + b.Append(arrow.Date32FromTime(tm)) + return nil +} + func (b *Date32Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -3109,6 +3305,20 @@ func (b *Date64Builder) newData() (data *Data) { return } +func (b *Date64Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + tm, err := time.Parse("2006-01-02", s) + if err != nil { + b.AppendNull() + return err + } + b.Append(arrow.Date64FromTime(tm)) + return nil +} + func (b *Date64Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -3319,6 +3529,14 @@ func (b *DurationBuilder) newData() (data *Data) { return } +func (b *DurationBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + return fmt.Errorf("%w: AppendValueFromString not implemented for Duration", arrow.ErrNotImplemented) +} + func (b *DurationBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl index c842845224..d414945ac0 100644 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ b/go/arrow/array/numericbuilder.gen.go.tmpl @@ -184,6 +184,75 @@ func (b *{{.Name}}Builder) newData() (data *Data) { return } +func (b *{{.Name}}Builder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + {{if or (eq .Name "Date32") -}} + tm, err := time.Parse("2006-01-02", s) + if err != nil { + b.AppendNull() + return err + } + b.Append(arrow.Date32FromTime(tm)) + {{else if or (eq .Name "Date64") -}} + tm, err := time.Parse("2006-01-02", s) + if err != nil { + b.AppendNull() + return err + } + b.Append(arrow.Date64FromTime(tm)) + {{else if or (eq .Name "Time32") -}} + val, err := arrow.Time32FromString(s, b.dtype.Unit) + if err != nil { + b.AppendNull() + return err + } + b.Append(val) + {{else if or (eq .Name "Time64") -}} + val, err := arrow.Time64FromString(s, b.dtype.Unit) + if err != nil { + b.AppendNull() + return err + } + b.Append(val) + {{else if or (eq .Name "Timestamp") -}} + v, err := arrow.TimestampFromString(s, b.dtype.Unit) + if err != nil { + b.AppendNull() + return err + } + b.Append(v) + {{else if (eq .Name "Duration") -}} + return fmt.Errorf("%w: AppendValueFromString not implemented for Duration", ErrNotImplemented) + {{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} + v, err := strconv.ParseInt(s, 10, {{.Size}}) + if err != nil { + b.AppendNull() + return err + } + b.Append({{.name}}(v)) + {{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} + v, err := strconv.ParseUint(s, 10, {{.Size}}) + if err != nil { + b.AppendNull() + return err + } + b.Append({{.name}}(v)) + {{else if or (eq .Name "Float32") (eq .Name "Float64") -}} + v, err := strconv.ParseFloat(s, {{.Size}}) + if err != nil { + b.AppendNull() + return err + } + b.Append({{.name}}(v)) + {{else}} + return fmt.Errorf("%w: AppendValueFromString not implemented for {{.Name}}", ErrNotImplemented) + {{end -}} + return nil +} + func (b *{{.Name}}Builder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -342,7 +411,7 @@ func (b *{{.Name}}Builder) UnmarshalOne(dec *json.Decoder) error { func (b *{{.Name}}Builder) Unmarshal(dec *json.Decoder) error { for dec.More() { - if err := b.unmarshalOne(dec); err != nil { + if err := b.UnmarshalOne(dec); err != nil { return err } } @@ -360,7 +429,7 @@ func (b *{{.Name}}Builder) UnmarshalJSON(data []byte) error { return fmt.Errorf("binary builder must unpack from json array, found %s", delim) } - return b.unmarshal(dec) + return b.Unmarshal(dec) } {{end}} diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go index a95c669a00..f48bce48f4 100644 --- a/go/arrow/array/numericbuilder.gen_test.go +++ b/go/arrow/array/numericbuilder.gen_test.go @@ -47,9 +47,10 @@ func TestNewInt64Builder(t *testing.T) { ab.Append(8) ab.Append(9) ab.Append(10) + assert.NoError(t, ab.AppendValueFromString("11")) // check state of builder before NewInt64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") + assert.Equal(t, 11, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.NewInt64Array() @@ -61,9 +62,9 @@ func TestNewInt64Builder(t *testing.T) { // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int64Values(), "unexpected Int64Values") + assert.Equal(t, []int64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, 11}, a.Int64Values(), "unexpected Int64Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int64Values(), 10, "unexpected length of Int64Values") + assert.Len(t, a.Int64Values(), 11, "unexpected length of Int64Values") a.Release() diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl index 47bd429446..996a8f5e3c 100644 --- a/go/arrow/array/numericbuilder.gen_test.go.tmpl +++ b/go/arrow/array/numericbuilder.gen_test.go.tmpl @@ -51,9 +51,10 @@ func TestNew{{.Name}}Builder(t *testing.T) { ab.Append(8) ab.Append(9) ab.Append(10) + ab.AppendValueFromString(11) // check state of builder before New{{.Name}}Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") + assert.Equal(t, 11, ab.Len(), "unexpected Len()") assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") a := ab.New{{.Name}}Array() @@ -65,7 +66,7 @@ func TestNew{{.Name}}Builder(t *testing.T) { // check state of array assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []{{or .QualifiedType .Type}}{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.{{.Name}}Values(), "unexpected {{.Name}}Values") + assert.Equal(t, []{{or .QualifiedType .Type}}{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, 11}, a.{{.Name}}Values(), "unexpected {{.Name}}Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity assert.Len(t, a.{{.Name}}Values(), 10, "unexpected length of {{.Name}}Values") diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go index dd215b40e8..20a67f2540 100644 --- a/go/arrow/array/string.go +++ b/go/arrow/array/string.go @@ -53,6 +53,13 @@ func (a *String) Value(i int) string { i = i + a.array.data.offset return a.values[a.offsets[i]:a.offsets[i+1]] } +func (a *String) ValueStr(i int) string { + if a.IsNull(i) { + return "(null)" + } else { + return a.Value(i) + } +} // ValueOffset returns the offset of the value at index i. func (a *String) ValueOffset(i int) int { @@ -188,6 +195,7 @@ func (a *LargeString) Value(i int) string { i = i + a.array.data.offset return a.values[a.offsets[i]:a.offsets[i+1]] } +func (a *LargeString) ValueStr(i int) string { return a.Value(i) } // ValueOffset returns the offset of the value at index i. func (a *LargeString) ValueOffset(i int) int64 { diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go index 9aafd0cda0..9575820e59 100644 --- a/go/arrow/array/string_test.go +++ b/go/arrow/array/string_test.go @@ -44,7 +44,8 @@ func TestStringArray(t *testing.T) { sb.Retain() sb.Release() - sb.AppendValues(want[:2], nil) + assert.NoError(t, sb.AppendValueFromString(want[0])) + sb.AppendValues(want[1:2], nil) sb.AppendNull() sb.Append(want[3]) @@ -63,6 +64,8 @@ func TestStringArray(t *testing.T) { arr.Retain() arr.Release() + assert.Equal(t, "hello", arr.ValueStr(0)) + if got, want := arr.Len(), len(want); got != want { t.Fatalf("invalid len: got=%d, want=%d", got, want) } diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go index b67722ee17..77fb368529 100644 --- a/go/arrow/array/struct.go +++ b/go/arrow/array/struct.go @@ -81,6 +81,16 @@ func NewStructData(data arrow.ArrayData) *Struct { func (a *Struct) NumField() int { return len(a.fields) } func (a *Struct) Field(i int) arrow.Array { return a.fields[i] } +// ValueStr returns the string representation (as json) of the value at index i. +func (a *Struct) ValueStr(i int) string { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { + panic(err) + } + return buf.String() +} + func (a *Struct) String() string { o := new(strings.Builder) o.WriteString("{") @@ -351,6 +361,14 @@ func (b *StructBuilder) newData() (data *Data) { return } +func (b *StructBuilder) AppendValueFromString(s string) error { + if !strings.HasPrefix(s, "{") && !strings.HasSuffix(s, "}") { + return fmt.Errorf("%w: invalid string for struct should be be of form: {*}", arrow.ErrInvalid,) + } + dec := json.NewDecoder(strings.NewReader(s)) + return b.UnmarshalOne(dec) +} + func (b *StructBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go index 4acf157621..9316ab75c6 100644 --- a/go/arrow/array/struct_test.go +++ b/go/arrow/array/struct_test.go @@ -23,6 +23,7 @@ import ( "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/stretchr/testify/assert" ) func TestStructArray(t *testing.T) { @@ -290,11 +291,12 @@ func TestStructArrayStringer(t *testing.T) { f2b.Append(f2s[i]) } } - + assert.NoError(t, sb.AppendValueFromString(`{"f1": 1.1, "f2": 1}`)) arr := sb.NewArray().(*array.Struct) defer arr.Release() - want := "{[1.1 (null) 1.3 1.4] [1 2 (null) 4]}" + assert.Equal(t, "{\"f1\":1.1,\"f2\":1}\n", arr.ValueStr(4)) + want := "{[1.1 (null) 1.3 1.4 1.1] [1 2 (null) 4 1]}" got := arr.String() if got != want { t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go index 190881189b..263525174d 100644 --- a/go/arrow/array/union.go +++ b/go/arrow/array/union.go @@ -343,6 +343,14 @@ func (a *SparseUnion) MarshalJSON() ([]byte, error) { return buf.Bytes(), nil } +func (a *SparseUnion) ValueStr(i int) string { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { + panic(err) + } + return buf.String() +} func (a *SparseUnion) String() string { var b strings.Builder b.WriteByte('[') @@ -601,6 +609,15 @@ func (a *DenseUnion) MarshalJSON() ([]byte, error) { return buf.Bytes(), nil } +func (a *DenseUnion) ValueStr(i int) string { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { + panic(err) + } + return buf.String() +} + func (a *DenseUnion) String() string { var b strings.Builder b.WriteByte('[') @@ -987,6 +1004,11 @@ func (b *SparseUnionBuilder) Unmarshal(dec *json.Decoder) error { return nil } +func (b *SparseUnionBuilder) AppendValueFromString(s string) error { + dec := json.NewDecoder(strings.NewReader(s)) + return b.UnmarshalOne(dec) +} + func (b *SparseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { @@ -1232,6 +1254,11 @@ func (b *DenseUnionBuilder) Unmarshal(dec *json.Decoder) error { return nil } +func (d *DenseUnionBuilder) AppendValueFromString(s string) error { + dec := json.NewDecoder(strings.NewReader(s)) + return d.UnmarshalOne(dec) +} + func (b *DenseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go index 46b0c4fdee..9b25992c3d 100644 --- a/go/arrow/csv/transformer.go +++ b/go/arrow/csv/transformer.go @@ -231,7 +231,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) [] if arr.IsNull(i) { res[i] = w.nullValue } else { - res[i] = arr.ValueString(i) + res[i] = arr.ValueStr(i) } } default: diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go index bb087a7d34..47de771d67 100644 --- a/go/internal/types/extension_types.go +++ b/go/internal/types/extension_types.go @@ -122,7 +122,7 @@ type UUIDArray struct { array.ExtensionArrayBase } -func (a UUIDArray) ValueString(i int) string { +func (a UUIDArray) ValueStr(i int) string { if a.IsNull(i) { return "(null)" } @@ -235,7 +235,7 @@ type Parametric1Array struct { array.ExtensionArrayBase } -func (a Parametric1Array) ValueString(i int) string { +func (a Parametric1Array) ValueStr(i int) string { arr := a.Storage().(*array.Int32) if a.IsNull(i) { return "(null)" @@ -250,7 +250,7 @@ type Parametric2Array struct { array.ExtensionArrayBase } -func (a Parametric2Array) ValueString(i int) string { +func (a Parametric2Array) ValueStr(i int) string { arr := a.Storage().(*array.Int32) if a.IsNull(i) { return "(null)" @@ -368,7 +368,7 @@ type ExtStructArray struct { array.ExtensionArrayBase } -func (a ExtStructArray) ValueString(i int) string { +func (a ExtStructArray) ValueStr(i int) string { arr := a.Storage().(*array.Struct) if a.IsNull(i) { return "(null)" @@ -425,7 +425,7 @@ type DictExtensionArray struct { array.ExtensionArrayBase } -func (a DictExtensionArray) ValueString(i int) string { +func (a DictExtensionArray) ValueStr(i int) string { arr := a.Storage().(*array.Dictionary) if a.IsNull(i) { return "(null)" @@ -476,7 +476,7 @@ type SmallintArray struct { array.ExtensionArrayBase } -func (a SmallintArray) ValueString(i int) string { +func (a SmallintArray) ValueStr(i int) string { if a.IsNull(i) { return "(null)" }
