This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 762bd2ab2e GH-35711: [Go] Add `Value` and `GetValueIndex` methods to
some builders (#35744)
762bd2ab2e is described below
commit 762bd2ab2ebd99b64022b4d42f4a8b035cbf2dd9
Author: Matthias Loibl <[email protected]>
AuthorDate: Tue May 30 17:40:54 2023 +0100
GH-35711: [Go] Add `Value` and `GetValueIndex` methods to some builders
(#35744)
### Rationale for this change
See #35711
### What changes are included in this PR?
Adding `*BinaryDictionaryBuilder.GetValueIndex(i int) int` to get the
value's index in the dictionary. Then two methods to get a `[]byte` and
`string` results. `Value(i int) []byte` and `ValueStr(i int) string`.
### Are these changes tested?
So far I have added very little testing. Mostly some sort of integration
test to read back values from the BinaryDictionaryBuilder. We should definitely
add some test for the `*BinaryMemoTable) Value(i int) []byte`.
### Are there any user-facing changes?
Just additions, no breaking API changes.
cc @ zeroshade
* Closes: #35711
Authored-by: Matthias Loibl <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
---
go/arrow/array/dictionary.go | 35 ++++++++++++++++++
go/arrow/array/dictionary_test.go | 4 +++
go/arrow/array/numericbuilder.gen.go | 60 +++++++++++++++++++++++++++++++
go/arrow/array/numericbuilder.gen.go.tmpl | 4 +++
go/internal/hashing/xxh3_memo_table.go | 4 +++
5 files changed, 107 insertions(+)
diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go
index 1b0de34b8e..1cbaac5a2c 100644
--- a/go/arrow/array/dictionary.go
+++ b/go/arrow/array/dictionary.go
@@ -1293,6 +1293,41 @@ func (b *BinaryDictionaryBuilder)
InsertStringDictValues(arr *String) (err error
return
}
+func (b *BinaryDictionaryBuilder) GetValueIndex(i int) int {
+ switch b := b.idxBuilder.Builder.(type) {
+ case *Uint8Builder:
+ return int(b.Value(i))
+ case *Int8Builder:
+ return int(b.Value(i))
+ case *Uint16Builder:
+ return int(b.Value(i))
+ case *Int16Builder:
+ return int(b.Value(i))
+ case *Uint32Builder:
+ return int(b.Value(i))
+ case *Int32Builder:
+ return int(b.Value(i))
+ case *Uint64Builder:
+ return int(b.Value(i))
+ case *Int64Builder:
+ return int(b.Value(i))
+ default:
+ return -1
+ }
+}
+
+func (b *BinaryDictionaryBuilder) Value(i int) []byte {
+ switch mt := b.memoTable.(type) {
+ case *hashing.BinaryMemoTable:
+ return mt.Value(i)
+ }
+ return nil
+}
+
+func (b *BinaryDictionaryBuilder) ValueStr(i int) string {
+ return string(b.Value(i))
+}
+
type FixedSizeBinaryDictionaryBuilder struct {
dictionaryBuilder
byteWidth int
diff --git a/go/arrow/array/dictionary_test.go
b/go/arrow/array/dictionary_test.go
index 7758c82c45..23b21e5aba 100644
--- a/go/arrow/array/dictionary_test.go
+++ b/go/arrow/array/dictionary_test.go
@@ -431,6 +431,10 @@ func TestBasicStringDictionaryBuilder(t *testing.T) {
assert.NoError(t, builder.AppendString("test2"))
assert.NoError(t, builder.AppendString("test"))
+ assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(0)))
+ assert.Equal(t, "test2", builder.ValueStr(builder.GetValueIndex(1)))
+ assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(2)))
+
result := bldr.NewDictionaryArray()
defer result.Release()
diff --git a/go/arrow/array/numericbuilder.gen.go
b/go/arrow/array/numericbuilder.gen.go
index b76a7292d7..4f9e68e3d3 100644
--- a/go/arrow/array/numericbuilder.gen.go
+++ b/go/arrow/array/numericbuilder.gen.go
@@ -143,6 +143,10 @@ func (b *Int64Builder) Resize(n int) {
}
}
+func (b *Int64Builder) Value(i int) int64 {
+ return b.rawData[i]
+}
+
// NewArray creates a Int64 array from the memory buffers used by the builder
and resets the Int64Builder
// so it can be used to build a new array.
func (b *Int64Builder) NewArray() arrow.Array {
@@ -366,6 +370,10 @@ func (b *Uint64Builder) Resize(n int) {
}
}
+func (b *Uint64Builder) Value(i int) uint64 {
+ return b.rawData[i]
+}
+
// NewArray creates a Uint64 array from the memory buffers used by the builder
and resets the Uint64Builder
// so it can be used to build a new array.
func (b *Uint64Builder) NewArray() arrow.Array {
@@ -589,6 +597,10 @@ func (b *Float64Builder) Resize(n int) {
}
}
+func (b *Float64Builder) Value(i int) float64 {
+ return b.rawData[i]
+}
+
// NewArray creates a Float64 array from the memory buffers used by the
builder and resets the Float64Builder
// so it can be used to build a new array.
func (b *Float64Builder) NewArray() arrow.Array {
@@ -812,6 +824,10 @@ func (b *Int32Builder) Resize(n int) {
}
}
+func (b *Int32Builder) Value(i int) int32 {
+ return b.rawData[i]
+}
+
// NewArray creates a Int32 array from the memory buffers used by the builder
and resets the Int32Builder
// so it can be used to build a new array.
func (b *Int32Builder) NewArray() arrow.Array {
@@ -1035,6 +1051,10 @@ func (b *Uint32Builder) Resize(n int) {
}
}
+func (b *Uint32Builder) Value(i int) uint32 {
+ return b.rawData[i]
+}
+
// NewArray creates a Uint32 array from the memory buffers used by the builder
and resets the Uint32Builder
// so it can be used to build a new array.
func (b *Uint32Builder) NewArray() arrow.Array {
@@ -1258,6 +1278,10 @@ func (b *Float32Builder) Resize(n int) {
}
}
+func (b *Float32Builder) Value(i int) float32 {
+ return b.rawData[i]
+}
+
// NewArray creates a Float32 array from the memory buffers used by the
builder and resets the Float32Builder
// so it can be used to build a new array.
func (b *Float32Builder) NewArray() arrow.Array {
@@ -1481,6 +1505,10 @@ func (b *Int16Builder) Resize(n int) {
}
}
+func (b *Int16Builder) Value(i int) int16 {
+ return b.rawData[i]
+}
+
// NewArray creates a Int16 array from the memory buffers used by the builder
and resets the Int16Builder
// so it can be used to build a new array.
func (b *Int16Builder) NewArray() arrow.Array {
@@ -1704,6 +1732,10 @@ func (b *Uint16Builder) Resize(n int) {
}
}
+func (b *Uint16Builder) Value(i int) uint16 {
+ return b.rawData[i]
+}
+
// NewArray creates a Uint16 array from the memory buffers used by the builder
and resets the Uint16Builder
// so it can be used to build a new array.
func (b *Uint16Builder) NewArray() arrow.Array {
@@ -1927,6 +1959,10 @@ func (b *Int8Builder) Resize(n int) {
}
}
+func (b *Int8Builder) Value(i int) int8 {
+ return b.rawData[i]
+}
+
// NewArray creates a Int8 array from the memory buffers used by the builder
and resets the Int8Builder
// so it can be used to build a new array.
func (b *Int8Builder) NewArray() arrow.Array {
@@ -2150,6 +2186,10 @@ func (b *Uint8Builder) Resize(n int) {
}
}
+func (b *Uint8Builder) Value(i int) uint8 {
+ return b.rawData[i]
+}
+
// NewArray creates a Uint8 array from the memory buffers used by the builder
and resets the Uint8Builder
// so it can be used to build a new array.
func (b *Uint8Builder) NewArray() arrow.Array {
@@ -2374,6 +2414,10 @@ func (b *Time32Builder) Resize(n int) {
}
}
+func (b *Time32Builder) Value(i int) arrow.Time32 {
+ return b.rawData[i]
+}
+
// NewArray creates a Time32 array from the memory buffers used by the builder
and resets the Time32Builder
// so it can be used to build a new array.
func (b *Time32Builder) NewArray() arrow.Array {
@@ -2598,6 +2642,10 @@ func (b *Time64Builder) Resize(n int) {
}
}
+func (b *Time64Builder) Value(i int) arrow.Time64 {
+ return b.rawData[i]
+}
+
// NewArray creates a Time64 array from the memory buffers used by the builder
and resets the Time64Builder
// so it can be used to build a new array.
func (b *Time64Builder) NewArray() arrow.Array {
@@ -2821,6 +2869,10 @@ func (b *Date32Builder) Resize(n int) {
}
}
+func (b *Date32Builder) Value(i int) arrow.Date32 {
+ return b.rawData[i]
+}
+
// NewArray creates a Date32 array from the memory buffers used by the builder
and resets the Date32Builder
// so it can be used to build a new array.
func (b *Date32Builder) NewArray() arrow.Array {
@@ -3044,6 +3096,10 @@ func (b *Date64Builder) Resize(n int) {
}
}
+func (b *Date64Builder) Value(i int) arrow.Date64 {
+ return b.rawData[i]
+}
+
// NewArray creates a Date64 array from the memory buffers used by the builder
and resets the Date64Builder
// so it can be used to build a new array.
func (b *Date64Builder) NewArray() arrow.Array {
@@ -3268,6 +3324,10 @@ func (b *DurationBuilder) Resize(n int) {
}
}
+func (b *DurationBuilder) Value(i int) arrow.Duration {
+ return b.rawData[i]
+}
+
// NewArray creates a Duration array from the memory buffers used by the
builder and resets the DurationBuilder
// so it can be used to build a new array.
func (b *DurationBuilder) NewArray() arrow.Array {
diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl
b/go/arrow/array/numericbuilder.gen.go.tmpl
index 6eb12b5aeb..c4c12e9b3d 100644
--- a/go/arrow/array/numericbuilder.gen.go.tmpl
+++ b/go/arrow/array/numericbuilder.gen.go.tmpl
@@ -147,6 +147,10 @@ func (b *{{.Name}}Builder) Resize(n int) {
}
}
+func (b *{{.Name}}Builder) Value(i int) {{or .QualifiedType .Type}} {
+ return b.rawData[i]
+}
+
// NewArray creates a {{.Name}} array from the memory buffers used by the
builder and resets the {{.Name}}Builder
// so it can be used to build a new array.
func (b *{{.Name}}Builder) NewArray() arrow.Array {
diff --git a/go/internal/hashing/xxh3_memo_table.go
b/go/internal/hashing/xxh3_memo_table.go
index f06b369a4a..9e0cc96a04 100644
--- a/go/internal/hashing/xxh3_memo_table.go
+++ b/go/internal/hashing/xxh3_memo_table.go
@@ -264,6 +264,10 @@ func (b *BinaryMemoTable) GetOrInsertNull() (idx int,
found bool) {
return
}
+func (b *BinaryMemoTable) Value(i int) []byte {
+ return b.builder.Value(i)
+}
+
// helper function to get the offset into the builder data for a given
// index value.
func (b *BinaryMemoTable) findOffset(idx int) uintptr {