zeroshade commented on code in PR #37468:
URL: https://github.com/apache/arrow/pull/37468#discussion_r1310788089
##########
go/arrow/array/list.go:
##########
@@ -618,19 +631,661 @@ func (b *baseListBuilder) UnmarshalJSON(data []byte)
error {
return b.Unmarshal(dec)
}
+// ListView represents an immutable sequence of array values defined by an
+// offset into a child array and a length.
+type ListView struct {
+ array
+ values arrow.Array
+ offsets []int32
+ sizes []int32
+}
+
+var _ VarLenListLike = (*ListView)(nil)
+
+func NewListViewData(data arrow.ArrayData) *ListView {
+ a := &ListView{}
+ a.refCount = 1
+ a.setData(data.(*Data))
+ return a
+}
+
+func (a *ListView) ListValues() arrow.Array { return a.values }
+
+func (a *ListView) ValueStr(i int) string {
+ if !a.IsValid(i) {
+ return NullValueStr
+ }
+ return string(a.GetOneForMarshal(i).(json.RawMessage))
+}
+
+func (a *ListView) String() string {
+ o := new(strings.Builder)
+ o.WriteString("[")
+ for i := 0; i < a.Len(); i++ {
+ if i > 0 {
+ o.WriteString(" ")
+ }
+ if !a.IsValid(i) {
+ o.WriteString(NullValueStr)
+ continue
+ }
+ sub := a.newListValue(i)
+ fmt.Fprintf(o, "%v", sub)
+ sub.Release()
+ }
+ o.WriteString("]")
+ return o.String()
+}
+
+func (a *ListView) newListValue(i int) arrow.Array {
+ beg, end := a.ValueOffsets(i)
+ return NewSlice(a.values, beg, end)
+}
+
+func (a *ListView) setData(data *Data) {
+ a.array.setData(data)
+ offsets := data.buffers[1]
+ if offsets != nil {
+ a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes())
+ }
+ sizes := data.buffers[2]
+ if sizes != nil {
+ a.sizes = arrow.Int32Traits.CastFromBytes(sizes.Bytes())
+ }
+ a.values = MakeFromData(data.childData[0])
+}
+
+func (a *ListView) GetOneForMarshal(i int) interface{} {
+ if a.IsNull(i) {
+ return nil
+ }
+
+ slice := a.newListValue(i)
+ defer slice.Release()
+ v, err := json.Marshal(slice)
+ if err != nil {
+ panic(err)
+ }
+ return json.RawMessage(v)
+}
+
+func (a *ListView) MarshalJSON() ([]byte, error) {
+ var buf bytes.Buffer
+ enc := json.NewEncoder(&buf)
+
+ buf.WriteByte('[')
+ for i := 0; i < a.Len(); i++ {
+ if i != 0 {
+ buf.WriteByte(',')
+ }
+ if err := enc.Encode(a.GetOneForMarshal(i)); err != nil {
+ return nil, err
+ }
+ }
+ buf.WriteByte(']')
+ return buf.Bytes(), nil
+}
+
+func arrayEqualListView(left, right *ListView) bool {
+ for i := 0; i < left.Len(); i++ {
+ if left.IsNull(i) {
+ continue
+ }
+ o := func() bool {
+ l := left.newListValue(i)
+ defer l.Release()
+ r := right.newListValue(i)
+ defer r.Release()
+ return Equal(l, r)
+ }()
+ if !o {
+ return false
+ }
+ }
+ return true
+}
+
+// Len returns the number of elements in the array.
+func (a *ListView) Len() int { return a.array.Len() }
+
+func (a *ListView) Offsets() []int32 { return a.offsets }
+
+func (a *ListView) Sizes() []int32 { return a.sizes }
+
+func (a *ListView) Retain() {
+ a.array.Retain()
+ a.values.Retain()
+}
+
+func (a *ListView) Release() {
+ a.array.Release()
+ a.values.Release()
+}
+
+func (a *ListView) ValueOffsets(i int) (start, end int64) {
+ debug.Assert(i >= 0 && i < a.array.data.length, "index out of range")
+ j := i + a.array.data.offset
+ start = int64(a.offsets[j])
+ end = start + int64(a.sizes[j])
+ return
+}
+
+// LargeListView represents an immutable sequence of array values defined by an
+// offset into a child array and a length.
+type LargeListView struct {
+ array
+ values arrow.Array
+ offsets []int64
+ sizes []int64
+}
+
+var _ VarLenListLike = (*LargeListView)(nil)
+
+// NewLargeListViewData returns a new LargeListView array value, from data.
+func NewLargeListViewData(data arrow.ArrayData) *LargeListView {
+ a := new(LargeListView)
+ a.refCount = 1
+ a.setData(data.(*Data))
+ return a
+}
+
+func (a *LargeListView) ListValues() arrow.Array { return a.values }
+
+func (a *LargeListView) ValueStr(i int) string {
+ if !a.IsValid(i) {
+ return NullValueStr
+ }
+ return string(a.GetOneForMarshal(i).(json.RawMessage))
+}
+
+func (a *LargeListView) String() string {
+ o := new(strings.Builder)
+ o.WriteString("[")
+ for i := 0; i < a.Len(); i++ {
+ if i > 0 {
+ o.WriteString(" ")
+ }
+ if !a.IsValid(i) {
+ o.WriteString(NullValueStr)
+ continue
+ }
+ sub := a.newListValue(i)
+ fmt.Fprintf(o, "%v", sub)
+ sub.Release()
+ }
+ o.WriteString("]")
+ return o.String()
+}
+
+func (a *LargeListView) newListValue(i int) arrow.Array {
+ beg, end := a.ValueOffsets(i)
+ return NewSlice(a.values, beg, end)
+}
+
+func (a *LargeListView) setData(data *Data) {
+ a.array.setData(data)
+ offsets := data.buffers[1]
+ if offsets != nil {
+ a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes())
+ }
+ sizes := data.buffers[2]
+ if sizes != nil {
+ a.sizes = arrow.Int64Traits.CastFromBytes(sizes.Bytes())
+ }
+ a.values = MakeFromData(data.childData[0])
+}
+
+func (a *LargeListView) GetOneForMarshal(i int) interface{} {
+ if a.IsNull(i) {
+ return nil
+ }
+
+ slice := a.newListValue(i)
+ defer slice.Release()
+ v, err := json.Marshal(slice)
+ if err != nil {
+ panic(err)
+ }
+ return json.RawMessage(v)
+}
+
+func (a *LargeListView) MarshalJSON() ([]byte, error) {
+ var buf bytes.Buffer
+ enc := json.NewEncoder(&buf)
+
+ buf.WriteByte('[')
+ for i := 0; i < a.Len(); i++ {
+ if i != 0 {
+ buf.WriteByte(',')
+ }
+ if err := enc.Encode(a.GetOneForMarshal(i)); err != nil {
+ return nil, err
+ }
+ }
+ buf.WriteByte(']')
+ return buf.Bytes(), nil
+}
+
+func arrayEqualLargeListView(left, right *LargeListView) bool {
+ for i := 0; i < left.Len(); i++ {
+ if left.IsNull(i) {
+ continue
+ }
+ o := func() bool {
+ l := left.newListValue(i)
+ defer l.Release()
+ r := right.newListValue(i)
+ defer r.Release()
+ return Equal(l, r)
+ }()
+ if !o {
+ return false
+ }
+ }
+ return true
+}
+
+// Len returns the number of elements in the array.
+func (a *LargeListView) Len() int { return a.array.Len() }
+
+func (a *LargeListView) Offsets() []int64 { return a.offsets }
+
+func (a *LargeListView) Sizes() []int64 { return a.sizes }
+
+func (a *LargeListView) ValueOffsets(i int) (start, end int64) {
+ debug.Assert(i >= 0 && i < a.array.data.length, "index out of range")
+ j := i + a.array.data.offset
+ start = int64(a.offsets[j])
+ end = start + int64(a.sizes[j])
+ return
+}
+
+func (a *LargeListView) Retain() {
+ a.array.Retain()
+ a.values.Retain()
+}
+
+func (a *LargeListView) Release() {
+ a.array.Release()
+ a.values.Release()
+}
+
+type baseListViewBuilder struct {
+ builder
+
+ values Builder // value builder for the list-view's elements.
+ offsets Builder
+ sizes Builder
+
+ // actual list-view type
+ dt arrow.DataType
+ appendOffsetVal func(int)
+ appendSizeVal func(int)
+}
+
+type ListViewBuilder struct {
+ baseListViewBuilder
+}
+
+type LargeListViewBuilder struct {
+ baseListViewBuilder
+}
+
+// NewListViewBuilder returns a builder, using the provided memory allocator.
+// The created list-view builder will create a list whose elements will be
+// of type etype.
+func NewListViewBuilder(mem memory.Allocator, etype arrow.DataType)
*ListViewBuilder {
+ offsetBldr := NewInt32Builder(mem)
+ sizeBldr := NewInt32Builder(mem)
+ return &ListViewBuilder{
+ baseListViewBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ values: NewBuilder(mem, etype),
+ offsets: offsetBldr,
+ sizes: sizeBldr,
+ dt: arrow.ListViewOf(etype),
+ appendOffsetVal: func(o int) {
offsetBldr.Append(int32(o)) },
+ appendSizeVal: func(s int) {
sizeBldr.Append(int32(s)) },
+ },
+ }
+}
+
+// NewListViewBuilderWithField takes a field to use for the child rather than
just
+// a datatype to allow for more customization.
+func NewListViewBuilderWithField(mem memory.Allocator, field arrow.Field)
*ListViewBuilder {
+ offsetBldr := NewInt32Builder(mem)
+ sizeBldr := NewInt32Builder(mem)
+ return &ListViewBuilder{
+ baseListViewBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ values: NewBuilder(mem, field.Type),
+ offsets: offsetBldr,
+ sizes: sizeBldr,
+ dt: arrow.ListViewOfField(field),
+ appendOffsetVal: func(o int) {
offsetBldr.Append(int32(o)) },
+ appendSizeVal: func(s int) {
sizeBldr.Append(int32(s)) },
+ },
+ }
+}
+
+func (b *baseListViewBuilder) Type() arrow.DataType {
+ switch dt := b.dt.(type) {
+ case *arrow.ListViewType:
+ f := dt.ElemField()
+ f.Type = b.values.Type()
+ return arrow.ListViewOfField(f)
+ case *arrow.LargeListViewType:
+ f := dt.ElemField()
+ f.Type = b.values.Type()
+ return arrow.LargeListViewOfField(f)
+ }
+ return nil
+}
+
+// NewLargeListViewBuilder returns a builder, using the provided memory
allocator.
+// The created list-view builder will create a list whose elements will be of
type etype.
+func NewLargeListViewBuilder(mem memory.Allocator, etype arrow.DataType)
*LargeListViewBuilder {
+ offsetBldr := NewInt64Builder(mem)
+ sizeBldr := NewInt64Builder(mem)
+ return &LargeListViewBuilder{
+ baseListViewBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ values: NewBuilder(mem, etype),
+ offsets: offsetBldr,
+ sizes: sizeBldr,
+ dt: arrow.LargeListViewOf(etype),
+ appendOffsetVal: func(o int) {
offsetBldr.Append(int64(o)) },
+ appendSizeVal: func(s int) {
sizeBldr.Append(int64(s)) },
+ },
+ }
+}
+
+// NewLargeListViewBuilderWithField takes a field rather than just an element
type
+// to allow for more customization of the final type of the LargeListView Array
+func NewLargeListViewBuilderWithField(mem memory.Allocator, field arrow.Field)
*LargeListViewBuilder {
+ offsetBldr := NewInt64Builder(mem)
+ sizeBldr := NewInt64Builder(mem)
+ return &LargeListViewBuilder{
+ baseListViewBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ values: NewBuilder(mem, field.Type),
+ offsets: offsetBldr,
+ sizes: sizeBldr,
+ dt: arrow.LargeListViewOfField(field),
+ appendOffsetVal: func(o int) {
offsetBldr.Append(int64(o)) },
+ appendSizeVal: func(o int) {
sizeBldr.Append(int64(o)) },
+ },
+ }
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+func (b *baseListViewBuilder) Release() {
+ debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+ if atomic.AddInt64(&b.refCount, -1) == 0 {
+ if b.nullBitmap != nil {
+ b.nullBitmap.Release()
+ b.nullBitmap = nil
+ }
+ b.values.Release()
+ b.offsets.Release()
+ b.sizes.Release()
+ }
+}
+
+func (b *baseListViewBuilder) appendDimensions(offset int, list_size int) {
+ b.appendOffsetVal(offset)
+ b.appendSizeVal(list_size)
+}
+
+func (b *baseListViewBuilder) Append(v bool) {
+ debug.Assert(false, "baseListViewBuilder.Append should never be called
-- use AppendWithSize instead")
+}
+
+func (b *baseListViewBuilder) AppendWithSize(v bool, list_size int) {
+ debug.Assert(v || list_size == 0, "invalid list-view should have size
0")
+ b.Reserve(1)
+ b.unsafeAppendBoolToBitmap(v)
+ b.appendDimensions(b.values.Len(), list_size)
+}
+
+func (b *baseListViewBuilder) AppendNull() {
+ b.AppendWithSize(false, 0)
+}
+
+func (b *baseListViewBuilder) AppendNulls(n int) {
+ for i := 0; i < n; i++ {
+ b.AppendNull()
+ }
+}
+
+func (b *baseListViewBuilder) AppendEmptyValue() {
+ b.AppendWithSize(true, 0)
+}
+
+func (b *baseListViewBuilder) AppendEmptyValues(n int) {
+ for i := 0; i < n; i++ {
+ b.AppendEmptyValue()
+ }
+}
+
+func (b *ListViewBuilder) AppendValuesWithSizes(offsets []int32, sizes
[]int32, valid []bool) {
+ b.Reserve(len(valid))
+ b.offsets.(*Int32Builder).AppendValues(offsets, nil)
+ b.sizes.(*Int32Builder).AppendValues(sizes, nil)
+ b.builder.unsafeAppendBoolsToBitmap(valid, len(valid))
+}
+
+func (b *LargeListViewBuilder) AppendValuesWithSizes(offsets []int64, sizes
[]int64, valid []bool) {
+ b.Reserve(len(valid))
+ b.offsets.(*Int64Builder).AppendValues(offsets, nil)
+ b.sizes.(*Int64Builder).AppendValues(sizes, nil)
+ b.builder.unsafeAppendBoolsToBitmap(valid, len(valid))
+}
+
+func (b *baseListViewBuilder) unsafeAppendBoolToBitmap(isValid bool) {
+ if isValid {
+ bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+ } else {
+ b.nulls++
+ }
+ b.length++
+}
+
+func (b *baseListViewBuilder) init(capacity int) {
+ b.builder.init(capacity)
+ b.offsets.init(capacity + 1)
+ b.sizes.init(capacity + 1)
Review Comment:
ListView doesn't need the +1 here right? Unlike `List` and `LargeList`,
`ListView` should have exactly the same number of offsets/sizes as it does
elements. It doesn't have length+1 like `List` does because there's no need for
the extra/end offset, right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]