This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new a661aa47 refactor(arrow): third increment of the Record -> RecordBatch
migration (#478)
a661aa47 is described below
commit a661aa4711c27a065907512c69bf2e9d3454b936
Author: Mandukhai Alimaa <[email protected]>
AuthorDate: Wed Aug 20 03:38:05 2025 +0800
refactor(arrow): third increment of the Record -> RecordBatch migration
(#478)
### Rationale for this change
Rename the Record interface to RecordBatch for clarity, since Record
commonly means a single row but this type represents a batch of rows.
### What changes are included in this PR?
The following packages now use RecordBatch instead of Record:
arrow/compute
arrow/util
arrow/array
arrow/internal/arrdata
arrow/internal/arrjson
arrow/internal/dictutils
### Are these changes tested?
arrow/compute, arrow/util, arrow/array, arrow/internal/arrjson,
arrow/internal/dictutils - All tests passing
arrow/internal/arrdata - no test files
### Migration Strategy
This is the third increment of the Record → RecordBatch migration.
---------
Co-authored-by: MANDY Alimaa <[email protected]>
---
arrow/array/compare.go | 4 +--
arrow/array/record.go | 2 +-
arrow/array/table.go | 14 ++++----
arrow/array/util.go | 8 ++---
arrow/compute/datum.go | 8 ++---
arrow/compute/fieldref.go | 8 ++---
arrow/compute/selection.go | 2 +-
arrow/internal/arrdata/arrdata.go | 74 +++++++++++++++++++--------------------
arrow/internal/arrdata/ioutil.go | 14 ++++----
arrow/internal/arrjson/arrjson.go | 8 ++---
arrow/internal/arrjson/reader.go | 6 ++--
arrow/internal/arrjson/writer.go | 2 +-
arrow/internal/dictutils/dict.go | 4 +--
arrow/util/byte_size.go | 6 ++--
arrow/util/protobuf_reflect.go | 4 +--
15 files changed, 82 insertions(+), 82 deletions(-)
diff --git a/arrow/array/compare.go b/arrow/array/compare.go
index 0278b4ca..3f1dad17 100644
--- a/arrow/array/compare.go
+++ b/arrow/array/compare.go
@@ -26,7 +26,7 @@ import (
)
// RecordEqual reports whether the two provided records are equal.
-func RecordEqual(left, right arrow.Record) bool {
+func RecordEqual(left, right arrow.RecordBatch) bool {
switch {
case left.NumCols() != right.NumCols():
return false
@@ -46,7 +46,7 @@ func RecordEqual(left, right arrow.Record) bool {
// RecordApproxEqual reports whether the two provided records are
approximately equal.
// For non-floating point columns, it is equivalent to RecordEqual.
-func RecordApproxEqual(left, right arrow.Record, opts ...EqualOption) bool {
+func RecordApproxEqual(left, right arrow.RecordBatch, opts ...EqualOption)
bool {
switch {
case left.NumCols() != right.NumCols():
return false
diff --git a/arrow/array/record.go b/arrow/array/record.go
index 18a50ed0..69f4cbaf 100644
--- a/arrow/array/record.go
+++ b/arrow/array/record.go
@@ -471,7 +471,7 @@ func ReaderFromIter(schema *arrow.Schema, itr
iter.Seq2[arrow.Record, error]) Re
// then Retain must be called on it.
func IterFromReader(rdr RecordReader) iter.Seq2[arrow.Record, error] {
rdr.Retain()
- return func(yield func(arrow.Record, error) bool) {
+ return func(yield func(arrow.RecordBatch, error) bool) {
defer rdr.Release()
for rdr.Next() {
if !yield(rdr.Record(), nil) {
diff --git a/arrow/array/table.go b/arrow/array/table.go
index 367b1b10..6c2f3365 100644
--- a/arrow/array/table.go
+++ b/arrow/array/table.go
@@ -175,7 +175,7 @@ func NewTableFromSlice(schema *arrow.Schema, data
[][]arrow.Array) arrow.Table {
// NewTableFromRecords returns a new basic, non-lazy in-memory table.
//
// NewTableFromRecords panics if the records and schema are inconsistent.
-func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record)
arrow.Table {
+func NewTableFromRecords(schema *arrow.Schema, recs []arrow.RecordBatch)
arrow.Table {
arrs := make([]arrow.Array, len(recs))
cols := make([]arrow.Column, schema.NumFields())
@@ -282,10 +282,10 @@ type TableReader struct {
refCount atomic.Int64
tbl arrow.Table
- cur int64 // current row
- max int64 // total number of rows
- rec arrow.Record // current Record
- chksz int64 // chunk size
+ cur int64 // current row
+ max int64 // total number of rows
+ rec arrow.RecordBatch // current RecordBatch
+ chksz int64 // chunk size
chunks []*arrow.Chunked
slots []int // chunk indices
@@ -320,8 +320,8 @@ func NewTableReader(tbl arrow.Table, chunkSize int64)
*TableReader {
return tr
}
-func (tr *TableReader) Schema() *arrow.Schema { return tr.tbl.Schema() }
-func (tr *TableReader) Record() arrow.Record { return tr.rec }
+func (tr *TableReader) Schema() *arrow.Schema { return tr.tbl.Schema() }
+func (tr *TableReader) Record() arrow.RecordBatch { return tr.rec }
func (tr *TableReader) Next() bool {
if tr.cur >= tr.max {
diff --git a/arrow/array/util.go b/arrow/array/util.go
index 1d27b2f8..9305e4c2 100644
--- a/arrow/array/util.go
+++ b/arrow/array/util.go
@@ -197,7 +197,7 @@ func RecordToStructArray(rec arrow.Record) *Struct {
// of the struct will be used to define the record batch. Otherwise the passed
in
// schema will be used to create the record batch. If passed in, the schema
must match
// the fields of the struct column.
-func RecordFromStructArray(in *Struct, schema *arrow.Schema) arrow.Record {
+func RecordFromStructArray(in *Struct, schema *arrow.Schema) arrow.RecordBatch
{
if schema == nil {
schema =
arrow.NewSchema(in.DataType().(*arrow.StructType).Fields(), nil)
}
@@ -213,7 +213,7 @@ func RecordFromStructArray(in *Struct, schema
*arrow.Schema) arrow.Record {
//
// See https://github.com/apache/arrow-go/issues/448 for more details on
// why this isn't a simple wrapper around FromJSON.
-func RecordFromJSON(mem memory.Allocator, schema *arrow.Schema, r io.Reader,
opts ...FromJSONOption) (arrow.Record, int64, error) {
+func RecordFromJSON(mem memory.Allocator, schema *arrow.Schema, r io.Reader,
opts ...FromJSONOption) (arrow.RecordBatch, int64, error) {
var cfg fromJSONCfg
for _, o := range opts {
o(&cfg)
@@ -279,7 +279,7 @@ func RecordFromJSON(mem memory.Allocator, schema
*arrow.Schema, r io.Reader, opt
// RecordToJSON writes out the given record following the format of each row
is a single object
// on a single line of the output.
-func RecordToJSON(rec arrow.Record, w io.Writer) error {
+func RecordToJSON(rec arrow.RecordBatch, w io.Writer) error {
enc := json.NewEncoder(w)
fields := rec.Schema().Fields()
@@ -297,7 +297,7 @@ func RecordToJSON(rec arrow.Record, w io.Writer) error {
}
func TableFromJSON(mem memory.Allocator, sc *arrow.Schema, recJSON []string,
opt ...FromJSONOption) (arrow.Table, error) {
- batches := make([]arrow.Record, len(recJSON))
+ batches := make([]arrow.RecordBatch, len(recJSON))
for i, batchJSON := range recJSON {
batch, _, err := RecordFromJSON(mem, sc,
strings.NewReader(batchJSON), opt...)
if err != nil {
diff --git a/arrow/compute/datum.go b/arrow/compute/datum.go
index 438f10a3..3287d58b 100644
--- a/arrow/compute/datum.go
+++ b/arrow/compute/datum.go
@@ -198,7 +198,7 @@ func (d *ChunkedDatum) Equals(other Datum) bool {
// RecordDatum contains an array.Record for passing a full record to an
expression
// or to compute.
type RecordDatum struct {
- Value arrow.Record
+ Value arrow.RecordBatch
}
func (RecordDatum) Kind() DatumKind { return KindRecord }
@@ -247,8 +247,8 @@ func (d *TableDatum) Equals(other Datum) bool {
// An arrow.Array gets an ArrayDatum
// An array.Chunked gets a ChunkedDatum
// An array.Record gets a RecordDatum
-// an array.Table gets a TableDatum
-// a scalar.Scalar gets a ScalarDatum
+// An array.Table gets a TableDatum
+// A scalar.Scalar gets a ScalarDatum
//
// Anything else is passed to scalar.MakeScalar and receives a scalar
// datum of that appropriate type.
@@ -285,7 +285,7 @@ func NewDatumWithoutOwning(value interface{}) Datum {
return &ArrayDatum{v}
case *arrow.Chunked:
return &ChunkedDatum{v}
- case arrow.Record:
+ case arrow.RecordBatch:
return &RecordDatum{v}
case arrow.Table:
return &TableDatum{v}
diff --git a/arrow/compute/fieldref.go b/arrow/compute/fieldref.go
index 55ec3372..92786bf8 100644
--- a/arrow/compute/fieldref.go
+++ b/arrow/compute/fieldref.go
@@ -162,7 +162,7 @@ func (f FieldPath) GetField(field arrow.Field)
(*arrow.Field, error) {
// GetColumn will return the correct child array by traversing the fieldpath
// going to the nested arrays of the columns in the record batch.
-func (f FieldPath) GetColumn(batch arrow.Record) (arrow.Array, error) {
+func (f FieldPath) GetColumn(batch arrow.RecordBatch) (arrow.Array, error) {
return f.getArray(batch.Columns())
}
@@ -514,7 +514,7 @@ func (f FieldRef) FindOneOrNone(schema *arrow.Schema)
(FieldPath, error) {
// FindOneOrNoneRecord is like FindOneOrNone but for the schema of a record,
// returning an error only if there are multiple matches.
-func (f FieldRef) FindOneOrNoneRecord(root arrow.Record) (FieldPath, error) {
+func (f FieldRef) FindOneOrNoneRecord(root arrow.RecordBatch) (FieldPath,
error) {
return f.FindOneOrNone(root.Schema())
}
@@ -533,7 +533,7 @@ func (f FieldRef) FindOne(schema *arrow.Schema) (FieldPath,
error) {
// GetAllColumns gets all the matching column arrays from the given record that
// this FieldRef references.
-func (f FieldRef) GetAllColumns(root arrow.Record) ([]arrow.Array, error) {
+func (f FieldRef) GetAllColumns(root arrow.RecordBatch) ([]arrow.Array, error)
{
out := make([]arrow.Array, 0)
for _, m := range f.FindAll(root.Schema().Fields()) {
n, err := m.GetColumn(root)
@@ -571,7 +571,7 @@ func (f FieldRef) GetOneOrNone(schema *arrow.Schema)
(*arrow.Field, error) {
// GetOneColumnOrNone returns either a nil or the referenced array if it can be
// found, erroring only if there is an ambiguous multiple matches.
-func (f FieldRef) GetOneColumnOrNone(root arrow.Record) (arrow.Array, error) {
+func (f FieldRef) GetOneColumnOrNone(root arrow.RecordBatch) (arrow.Array,
error) {
match, err := f.FindOneOrNoneRecord(root)
if err != nil {
return nil, err
diff --git a/arrow/compute/selection.go b/arrow/compute/selection.go
index feac9d88..77ab8a6d 100644
--- a/arrow/compute/selection.go
+++ b/arrow/compute/selection.go
@@ -576,7 +576,7 @@ func FilterArray(ctx context.Context, values, filter
arrow.Array, options Filter
return outDatum.(*ArrayDatum).MakeArray(), nil
}
-func FilterRecordBatch(ctx context.Context, batch arrow.Record, filter
arrow.Array, opts *FilterOptions) (arrow.Record, error) {
+func FilterRecordBatch(ctx context.Context, batch arrow.RecordBatch, filter
arrow.Array, opts *FilterOptions) (arrow.RecordBatch, error) {
if batch.NumRows() != int64(filter.Len()) {
return nil, fmt.Errorf("%w: filter inputs must all be the same
length", arrow.ErrInvalid)
}
diff --git a/arrow/internal/arrdata/arrdata.go
b/arrow/internal/arrdata/arrdata.go
index 3630aa8b..ea4221e5 100644
--- a/arrow/internal/arrdata/arrdata.go
+++ b/arrow/internal/arrdata/arrdata.go
@@ -32,7 +32,7 @@ import (
)
var (
- Records = make(map[string][]arrow.Record)
+ Records = make(map[string][]arrow.RecordBatch)
RecordNames []string
)
@@ -62,7 +62,7 @@ func init() {
sort.Strings(RecordNames)
}
-func makeNullRecords() []arrow.Record {
+func makeNullRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
meta := arrow.NewMetadata(
@@ -97,7 +97,7 @@ func makeNullRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -105,7 +105,7 @@ func makeNullRecords() []arrow.Record {
return recs
}
-func makePrimitiveRecords() []arrow.Record {
+func makePrimitiveRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
meta := arrow.NewMetadata(
@@ -180,7 +180,7 @@ func makePrimitiveRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -188,7 +188,7 @@ func makePrimitiveRecords() []arrow.Record {
return recs
}
-func makeStructsRecords() []arrow.Record {
+func makeStructsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
fields := []arrow.Field{
@@ -258,7 +258,7 @@ func makeStructsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -266,7 +266,7 @@ func makeStructsRecords() []arrow.Record {
return recs
}
-func makeListsRecords() []arrow.Record {
+func makeListsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
dtype := arrow.ListOf(arrow.PrimitiveTypes.Int32)
schema := arrow.NewSchema([]arrow.Field{
@@ -315,7 +315,7 @@ func makeListsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -323,7 +323,7 @@ func makeListsRecords() []arrow.Record {
return recs
}
-func makeListViewsRecords() []arrow.Record {
+func makeListViewsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
dtype := arrow.ListViewOf(arrow.PrimitiveTypes.Int32)
schema := arrow.NewSchema([]arrow.Field{
@@ -372,7 +372,7 @@ func makeListViewsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -380,7 +380,7 @@ func makeListViewsRecords() []arrow.Record {
return recs
}
-func makeFixedSizeListsRecords() []arrow.Record {
+func makeFixedSizeListsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
const N = 3
dtype := arrow.FixedSizeListOf(N, arrow.PrimitiveTypes.Int32)
@@ -422,7 +422,7 @@ func makeFixedSizeListsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -430,7 +430,7 @@ func makeFixedSizeListsRecords() []arrow.Record {
return recs
}
-func makeStringsRecords() []arrow.Record {
+func makeStringsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema([]arrow.Field{
{Name: "strings", Type: arrow.BinaryTypes.String},
@@ -461,7 +461,7 @@ func makeStringsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -485,7 +485,7 @@ var (
null nullT
)
-func makeFixedWidthTypesRecords() []arrow.Record {
+func makeFixedWidthTypesRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
[]arrow.Field{
@@ -562,7 +562,7 @@ func makeFixedWidthTypesRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -572,7 +572,7 @@ func makeFixedWidthTypesRecords() []arrow.Record {
type fsb3 string
-func makeFixedSizeBinariesRecords() []arrow.Record {
+func makeFixedSizeBinariesRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
[]arrow.Field{
@@ -601,7 +601,7 @@ func makeFixedSizeBinariesRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -609,7 +609,7 @@ func makeFixedSizeBinariesRecords() []arrow.Record {
return recs
}
-func makeIntervalsRecords() []arrow.Record {
+func makeIntervalsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
@@ -682,7 +682,7 @@ func makeIntervalsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -697,7 +697,7 @@ type (
duration_ns arrow.Duration
)
-func makeDurationsRecords() []arrow.Record {
+func makeDurationsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
@@ -739,7 +739,7 @@ func makeDurationsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -752,7 +752,7 @@ var (
decimal256Type = &arrow.Decimal256Type{Precision: 72, Scale: 2}
)
-func makeDecimal128sRecords() []arrow.Record {
+func makeDecimal128sRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
[]arrow.Field{
@@ -789,7 +789,7 @@ func makeDecimal128sRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -797,7 +797,7 @@ func makeDecimal128sRecords() []arrow.Record {
return recs
}
-func makeDecimal256sRecords() []arrow.Record {
+func makeDecimal256sRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema(
[]arrow.Field{
@@ -834,7 +834,7 @@ func makeDecimal256sRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -842,7 +842,7 @@ func makeDecimal256sRecords() []arrow.Record {
return recs
}
-func makeMapsRecords() []arrow.Record {
+func makeMapsRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
dtype := arrow.MapOf(arrow.PrimitiveTypes.Int32,
arrow.BinaryTypes.String)
dtype.KeysSorted = true
@@ -956,7 +956,7 @@ func makeMapsRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -964,7 +964,7 @@ func makeMapsRecords() []arrow.Record {
return recs
}
-func makeExtensionRecords() []arrow.Record {
+func makeExtensionRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
p1Type := types.NewParametric1Type(6)
@@ -1035,7 +1035,7 @@ func makeExtensionRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -1043,7 +1043,7 @@ func makeExtensionRecords() []arrow.Record {
return recs
}
-func makeUnionRecords() []arrow.Record {
+func makeUnionRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
unionFields := []arrow.Field{
@@ -1100,12 +1100,12 @@ func makeUnionRecords() []arrow.Record {
defer sparse2.Release()
defer dense2.Release()
- return []arrow.Record{
+ return []arrow.RecordBatch{
array.NewRecord(schema, []arrow.Array{sparse1, dense1}, -1),
array.NewRecord(schema, []arrow.Array{sparse2, dense2}, -1)}
}
-func makeRunEndEncodedRecords() []arrow.Record {
+func makeRunEndEncodedRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema([]arrow.Field{
{Name: "ree16", Type:
arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String)},
@@ -1148,7 +1148,7 @@ func makeRunEndEncodedRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
@@ -1156,7 +1156,7 @@ func makeRunEndEncodedRecords() []arrow.Record {
return recs
}
-func makeStringViewRecords() []arrow.Record {
+func makeStringViewRecords() []arrow.RecordBatch {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema([]arrow.Field{
{Name: "binary_view", Type: arrow.BinaryTypes.BinaryView,
Nullable: true},
@@ -1187,7 +1187,7 @@ func makeStringViewRecords() []arrow.Record {
}
}()
- recs := make([]arrow.Record, len(chunks))
+ recs := make([]arrow.RecordBatch, len(chunks))
for i, chunk := range chunks {
recs[i] = array.NewRecord(schema, chunk, -1)
}
diff --git a/arrow/internal/arrdata/ioutil.go b/arrow/internal/arrdata/ioutil.go
index d51210d1..2ef70436 100644
--- a/arrow/internal/arrdata/ioutil.go
+++ b/arrow/internal/arrdata/ioutil.go
@@ -31,7 +31,7 @@ import (
)
// CheckArrowFile checks whether a given ARROW file contains the expected list
of records.
-func CheckArrowFile(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.Record) {
+func CheckArrowFile(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.RecordBatch) {
t.Helper()
_, err := f.Seek(0, io.SeekStart)
@@ -62,7 +62,7 @@ func CheckArrowFile(t *testing.T, f *os.File, mem
memory.Allocator, schema *arro
}
-func CheckArrowConcurrentFile(t *testing.T, f *os.File, mem memory.Allocator,
schema *arrow.Schema, recs []arrow.Record) {
+func CheckArrowConcurrentFile(t *testing.T, f *os.File, mem memory.Allocator,
schema *arrow.Schema, recs []arrow.RecordBatch) {
t.Helper()
_, err := f.Seek(0, io.SeekStart)
@@ -112,7 +112,7 @@ func CheckArrowConcurrentFile(t *testing.T, f *os.File, mem
memory.Allocator, sc
}
// CheckArrowStream checks whether a given ARROW stream contains the expected
list of records.
-func CheckArrowStream(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.Record) {
+func CheckArrowStream(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.RecordBatch) {
t.Helper()
_, err := f.Seek(0, io.SeekStart)
@@ -142,7 +142,7 @@ func CheckArrowStream(t *testing.T, f *os.File, mem
memory.Allocator, schema *ar
}
// WriteFile writes a list of records to the given file descriptor, as an
ARROW file.
-func WriteFile(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.Record) {
+func WriteFile(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.RecordBatch) {
t.Helper()
w, err := ipc.NewFileWriter(f, ipc.WithSchema(schema),
ipc.WithAllocator(mem))
@@ -178,7 +178,7 @@ func WriteFile(t *testing.T, f *os.File, mem
memory.Allocator, schema *arrow.Sch
}
// WriteFile writes a list of records to the given file descriptor, as an
ARROW file.
-func WriteFileCompressed(t *testing.T, f *os.File, mem memory.Allocator,
schema *arrow.Schema, recs []arrow.Record, codec flatbuf.CompressionType,
concurrency int) {
+func WriteFileCompressed(t *testing.T, f *os.File, mem memory.Allocator,
schema *arrow.Schema, recs []arrow.RecordBatch, codec flatbuf.CompressionType,
concurrency int) {
t.Helper()
opts := []ipc.Option{ipc.WithSchema(schema), ipc.WithAllocator(mem),
ipc.WithCompressConcurrency(concurrency)}
@@ -224,7 +224,7 @@ func WriteFileCompressed(t *testing.T, f *os.File, mem
memory.Allocator, schema
}
// WriteStream writes a list of records to the given file descriptor, as an
ARROW stream.
-func WriteStream(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.Record) {
+func WriteStream(t *testing.T, f *os.File, mem memory.Allocator, schema
*arrow.Schema, recs []arrow.RecordBatch) {
t.Helper()
w := ipc.NewWriter(f, ipc.WithSchema(schema), ipc.WithAllocator(mem))
@@ -245,7 +245,7 @@ func WriteStream(t *testing.T, f *os.File, mem
memory.Allocator, schema *arrow.S
// WriteStreamCompressed writes a list of records to the given file descriptor
as an ARROW stream
// using the provided compression type.
-func WriteStreamCompressed(t *testing.T, f *os.File, mem memory.Allocator,
schema *arrow.Schema, recs []arrow.Record, codec flatbuf.CompressionType, np
int) {
+func WriteStreamCompressed(t *testing.T, f *os.File, mem memory.Allocator,
schema *arrow.Schema, recs []arrow.RecordBatch, codec flatbuf.CompressionType,
np int) {
t.Helper()
opts := []ipc.Option{ipc.WithSchema(schema), ipc.WithAllocator(mem),
ipc.WithCompressConcurrency(np)}
diff --git a/arrow/internal/arrjson/arrjson.go
b/arrow/internal/arrjson/arrjson.go
index a2c86ff0..4a1da61c 100644
--- a/arrow/internal/arrjson/arrjson.go
+++ b/arrow/internal/arrjson/arrjson.go
@@ -796,15 +796,15 @@ type Record struct {
Columns []Array `json:"columns"`
}
-func recordsFromJSON(mem memory.Allocator, schema *arrow.Schema, recs
[]Record, memo *dictutils.Memo) []arrow.Record {
- vs := make([]arrow.Record, len(recs))
+func recordsFromJSON(mem memory.Allocator, schema *arrow.Schema, recs
[]Record, memo *dictutils.Memo) []arrow.RecordBatch {
+ vs := make([]arrow.RecordBatch, len(recs))
for i, rec := range recs {
vs[i] = recordFromJSON(mem, schema, rec, memo)
}
return vs
}
-func recordFromJSON(mem memory.Allocator, schema *arrow.Schema, rec Record,
memo *dictutils.Memo) arrow.Record {
+func recordFromJSON(mem memory.Allocator, schema *arrow.Schema, rec Record,
memo *dictutils.Memo) arrow.RecordBatch {
arrs := arraysFromJSON(mem, schema, rec.Columns)
if err := dictutils.ResolveDictionaries(memo, arrs,
dictutils.NewFieldPos(), mem); err != nil {
panic(err)
@@ -819,7 +819,7 @@ func recordFromJSON(mem memory.Allocator, schema
*arrow.Schema, rec Record, memo
return array.NewRecord(schema, cols, int64(rec.Count))
}
-func recordToJSON(rec arrow.Record) Record {
+func recordToJSON(rec arrow.RecordBatch) Record {
return Record{
Count: rec.NumRows(),
Columns: arraysToJSON(rec.Schema(), rec.Columns()),
diff --git a/arrow/internal/arrjson/reader.go b/arrow/internal/arrjson/reader.go
index ec021fc1..4fb175cb 100644
--- a/arrow/internal/arrjson/reader.go
+++ b/arrow/internal/arrjson/reader.go
@@ -31,7 +31,7 @@ type Reader struct {
refs atomic.Int64
schema *arrow.Schema
- recs []arrow.Record
+ recs []arrow.RecordBatch
memo *dictutils.Memo
irec int // current record index. used for the arrio.Reader interface.
@@ -89,7 +89,7 @@ func (r *Reader) Release() {
func (r *Reader) Schema() *arrow.Schema { return r.schema }
func (r *Reader) NumRecords() int { return len(r.recs) }
-func (r *Reader) Read() (arrow.Record, error) {
+func (r *Reader) Read() (arrow.RecordBatch, error) {
if r.irec == r.NumRecords() {
return nil, io.EOF
}
@@ -98,7 +98,7 @@ func (r *Reader) Read() (arrow.Record, error) {
return rec, nil
}
-func (r *Reader) ReadAt(index int) (arrow.Record, error) {
+func (r *Reader) ReadAt(index int) (arrow.RecordBatch, error) {
if index >= r.NumRecords() {
return nil, io.EOF
}
diff --git a/arrow/internal/arrjson/writer.go b/arrow/internal/arrjson/writer.go
index 5cbed209..fa0477aa 100644
--- a/arrow/internal/arrjson/writer.go
+++ b/arrow/internal/arrjson/writer.go
@@ -57,7 +57,7 @@ func NewWriter(w io.Writer, schema *arrow.Schema) (*Writer,
error) {
return ww, nil
}
-func (w *Writer) Write(rec arrow.Record) error {
+func (w *Writer) Write(rec arrow.RecordBatch) error {
if w.nrecs == 0 {
pairs, err := dictutils.CollectDictionaries(rec, &w.mapper)
if err != nil {
diff --git a/arrow/internal/dictutils/dict.go b/arrow/internal/dictutils/dict.go
index 184e29c0..fa075ceb 100644
--- a/arrow/internal/dictutils/dict.go
+++ b/arrow/internal/dictutils/dict.go
@@ -206,7 +206,7 @@ func (d *dictCollector) visit(pos FieldPos, arr
arrow.Array) error {
return d.visitChildren(pos, dt, arr)
}
-func (d *dictCollector) collect(batch arrow.Record) error {
+func (d *dictCollector) collect(batch arrow.RecordBatch) error {
var (
pos = NewFieldPos()
schema = batch.Schema()
@@ -369,7 +369,7 @@ func (memo *Memo) AddOrReplace(id int64, v arrow.ArrayData)
bool {
return !ok
}
-func CollectDictionaries(batch arrow.Record, mapper *Mapper) (out []dictpair,
err error) {
+func CollectDictionaries(batch arrow.RecordBatch, mapper *Mapper) (out
[]dictpair, err error) {
collector := dictCollector{mapper: mapper}
err = collector.collect(batch)
out = collector.dictionaries
diff --git a/arrow/util/byte_size.go b/arrow/util/byte_size.go
index b9296f37..8344c2f5 100644
--- a/arrow/util/byte_size.go
+++ b/arrow/util/byte_size.go
@@ -58,7 +58,7 @@ func totalArraySize(arr arrow.Array, seenBuffers
map[*memory.Buffer]struct{}) in
return totalArrayDataSize(arr.Data(), seenBuffers)
}
-func totalRecordSize(record arrow.Record, seenBuffers
map[*memory.Buffer]struct{}) int64 {
+func totalRecordSize(record arrow.RecordBatch, seenBuffers
map[*memory.Buffer]struct{}) int64 {
var sum int64
for _, c := range record.Columns() {
sum += totalArraySize(c, seenBuffers)
@@ -72,8 +72,8 @@ func TotalArraySize(arr arrow.Array) int64 {
return totalArraySize(arr, seenBuffer)
}
-// TotalRecordSize return the sum of bytes in each buffer referenced by the
Record.
-func TotalRecordSize(record arrow.Record) int64 {
+// TotalRecordSize return the sum of bytes in each buffer referenced by the
RecordBatch.
+func TotalRecordSize(record arrow.RecordBatch) int64 {
seenBuffer := make(map[*memory.Buffer]struct{})
return totalRecordSize(record, seenBuffer)
}
diff --git a/arrow/util/protobuf_reflect.go b/arrow/util/protobuf_reflect.go
index 69507456..e8b80c68 100644
--- a/arrow/util/protobuf_reflect.go
+++ b/arrow/util/protobuf_reflect.go
@@ -630,8 +630,8 @@ func (msg ProtobufMessageReflection) Schema() *arrow.Schema
{
return arrow.NewSchema(fields, nil)
}
-// Record returns an arrow.Record for a protobuf message
-func (msg ProtobufMessageReflection) Record(mem memory.Allocator) arrow.Record
{
+// Record returns an arrow.RecordBatch for a protobuf message
+func (msg ProtobufMessageReflection) Record(mem memory.Allocator)
arrow.RecordBatch {
if mem == nil {
mem = memory.NewGoAllocator()
}