zeroshade commented on code in PR #37468:
URL: https://github.com/apache/arrow/pull/37468#discussion_r1353282562
##########
go/arrow/array/list.go:
##########
@@ -618,19 +634,1061 @@ func (b *baseListBuilder) UnmarshalJSON(data []byte)
error {
return b.Unmarshal(dec)
}
+// ListView represents an immutable sequence of array values defined by an
+// offset into a child array and a length.
+type ListView struct {
+ array
+ values arrow.Array
+ offsets []int32
+ sizes []int32
+}
+
+var _ VarLenListLike = (*ListView)(nil)
+
+func NewListViewData(data arrow.ArrayData) *ListView {
+ a := &ListView{}
+ a.refCount = 1
+ a.setData(data.(*Data))
+ return a
+}
+
+func (a *ListView) ListValues() arrow.Array { return a.values }
+
+func (a *ListView) ValueStr(i int) string {
+ if !a.IsValid(i) {
+ return NullValueStr
+ }
+ return string(a.GetOneForMarshal(i).(json.RawMessage))
+}
+
+func (a *ListView) String() string {
+ o := new(strings.Builder)
+ o.WriteString("[")
+ for i := 0; i < a.Len(); i++ {
+ if i > 0 {
+ o.WriteString(" ")
+ }
+ if a.IsNull(i) {
+ o.WriteString(NullValueStr)
+ continue
+ }
+ sub := a.newListValue(i)
+ fmt.Fprintf(o, "%v", sub)
+ sub.Release()
+ }
+ o.WriteString("]")
+ return o.String()
+}
+
+func (a *ListView) newListValue(i int) arrow.Array {
+ beg, end := a.ValueOffsets(i)
+ return NewSlice(a.values, beg, end)
+}
+
+func (a *ListView) setData(data *Data) {
+ debug.Assert(len(data.buffers) >= 3, "list-view data should have 3
buffers")
+ a.array.setData(data)
+ offsets := data.buffers[1]
+ if offsets != nil {
+ a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes())
+ }
+ sizes := data.buffers[2]
+ if sizes != nil {
+ a.sizes = arrow.Int32Traits.CastFromBytes(sizes.Bytes())
+ }
+ a.values = MakeFromData(data.childData[0])
+}
+
+func (a *ListView) GetOneForMarshal(i int) interface{} {
+ if a.IsNull(i) {
+ return nil
+ }
+
+ slice := a.newListValue(i)
+ defer slice.Release()
+ v, err := json.Marshal(slice)
+ if err != nil {
+ panic(err)
+ }
+ return json.RawMessage(v)
+}
+
+func (a *ListView) MarshalJSON() ([]byte, error) {
+ var buf bytes.Buffer
+ enc := json.NewEncoder(&buf)
+
+ buf.WriteByte('[')
+ for i := 0; i < a.Len(); i++ {
+ if i != 0 {
+ buf.WriteByte(',')
+ }
+ if err := enc.Encode(a.GetOneForMarshal(i)); err != nil {
+ return nil, err
+ }
+ }
+ buf.WriteByte(']')
+ return buf.Bytes(), nil
+}
+
+func arrayEqualListView(left, right *ListView) bool {
+ for i := 0; i < left.Len(); i++ {
+ if left.IsNull(i) {
+ continue
+ }
+ o := func() bool {
+ l := left.newListValue(i)
+ defer l.Release()
+ r := right.newListValue(i)
+ defer r.Release()
+ return Equal(l, r)
+ }()
+ if !o {
+ return false
+ }
+ }
+ return true
+}
+
+// Len returns the number of elements in the array.
+func (a *ListView) Len() int { return a.array.Len() }
+
+func (a *ListView) Offsets() []int32 { return a.offsets }
+
+func (a *ListView) Sizes() []int32 { return a.sizes }
+
+func (a *ListView) Retain() {
+ a.array.Retain()
+ a.values.Retain()
+}
+
+func (a *ListView) Release() {
+ a.array.Release()
+ a.values.Release()
+}
+
+func (a *ListView) ValueOffsets(i int) (start, end int64) {
+ debug.Assert(i >= 0 && i < a.array.data.length, "index out of range")
+ j := i + a.array.data.offset
+ size := int64(a.sizes[j])
+ // If size is 0, skip accessing offsets.
+ if size == 0 {
+ start, end = 0, 0
+ return
+ }
+ start = int64(a.offsets[j])
+ end = start + size
+ return
+}
+
+// LargeListView represents an immutable sequence of array values defined by an
+// offset into a child array and a length.
+type LargeListView struct {
+ array
+ values arrow.Array
+ offsets []int64
+ sizes []int64
+}
+
+var _ VarLenListLike = (*LargeListView)(nil)
+
+// NewLargeListViewData returns a new LargeListView array value, from data.
+func NewLargeListViewData(data arrow.ArrayData) *LargeListView {
+ a := new(LargeListView)
+ a.refCount = 1
+ a.setData(data.(*Data))
+ return a
+}
+
+func (a *LargeListView) ListValues() arrow.Array { return a.values }
+
+func (a *LargeListView) ValueStr(i int) string {
+ if !a.IsValid(i) {
+ return NullValueStr
+ }
+ return string(a.GetOneForMarshal(i).(json.RawMessage))
+}
+
+func (a *LargeListView) String() string {
+ o := new(strings.Builder)
+ o.WriteString("[")
+ for i := 0; i < a.Len(); i++ {
+ if i > 0 {
+ o.WriteString(" ")
+ }
+ if a.IsNull(i) {
+ o.WriteString(NullValueStr)
+ continue
+ }
+ sub := a.newListValue(i)
+ fmt.Fprintf(o, "%v", sub)
+ sub.Release()
+ }
+ o.WriteString("]")
+ return o.String()
+}
+
+func (a *LargeListView) newListValue(i int) arrow.Array {
+ beg, end := a.ValueOffsets(i)
+ return NewSlice(a.values, beg, end)
+}
+
+func (a *LargeListView) setData(data *Data) {
+ debug.Assert(len(data.buffers) >= 3, "list-view data should have 3
buffers")
+ a.array.setData(data)
+ offsets := data.buffers[1]
+ if offsets != nil {
+ a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes())
+ }
+ sizes := data.buffers[2]
+ if sizes != nil {
+ a.sizes = arrow.Int64Traits.CastFromBytes(sizes.Bytes())
+ }
+ a.values = MakeFromData(data.childData[0])
+}
+
+func (a *LargeListView) GetOneForMarshal(i int) interface{} {
+ if a.IsNull(i) {
+ return nil
+ }
+
+ slice := a.newListValue(i)
+ defer slice.Release()
+ v, err := json.Marshal(slice)
+ if err != nil {
+ panic(err)
+ }
+ return json.RawMessage(v)
+}
+
+func (a *LargeListView) MarshalJSON() ([]byte, error) {
+ var buf bytes.Buffer
+ enc := json.NewEncoder(&buf)
+
+ buf.WriteByte('[')
+ for i := 0; i < a.Len(); i++ {
+ if i != 0 {
+ buf.WriteByte(',')
+ }
+ if err := enc.Encode(a.GetOneForMarshal(i)); err != nil {
+ return nil, err
+ }
+ }
+ buf.WriteByte(']')
+ return buf.Bytes(), nil
+}
+
+func arrayEqualLargeListView(left, right *LargeListView) bool {
+ for i := 0; i < left.Len(); i++ {
+ if left.IsNull(i) {
+ continue
+ }
+ o := func() bool {
+ l := left.newListValue(i)
+ defer l.Release()
+ r := right.newListValue(i)
+ defer r.Release()
+ return Equal(l, r)
+ }()
+ if !o {
+ return false
+ }
+ }
+ return true
+}
+
+// Len returns the number of elements in the array.
+func (a *LargeListView) Len() int { return a.array.Len() }
+
+func (a *LargeListView) Offsets() []int64 { return a.offsets }
+
+func (a *LargeListView) Sizes() []int64 { return a.sizes }
+
+func (a *LargeListView) ValueOffsets(i int) (start, end int64) {
+ debug.Assert(i >= 0 && i < a.array.data.length, "index out of range")
+ j := i + a.array.data.offset
+ size := a.sizes[j]
+ // If size is 0, skip accessing offsets.
+ if size == 0 {
+ return 0, 0
+ }
+ start = a.offsets[j]
+ end = start + size
+ return
+}
+
+func (a *LargeListView) Retain() {
+ a.array.Retain()
+ a.values.Retain()
+}
+
+func (a *LargeListView) Release() {
+ a.array.Release()
+ a.values.Release()
+}
+
+// Acessors for offsets and sizes to make ListView and LargeListView
validation generic.
+type offsetsAndSizes interface {
+ offsetAt(slot int64) int64
+ sizeAt(slot int64) int64
+}
+
+var _ offsetsAndSizes = (*ListView)(nil)
+var _ offsetsAndSizes = (*LargeListView)(nil)
+
+func (a *ListView) offsetAt(slot int64) int64 { return
int64(a.offsets[int64(a.data.offset)+slot]) }
+
+func (a *ListView) sizeAt(slot int64) int64 { return
int64(a.sizes[int64(a.data.offset)+slot]) }
+
+func (a *LargeListView) offsetAt(slot int64) int64 { return
a.offsets[int64(a.data.offset)+slot] }
+
+func (a *LargeListView) sizeAt(slot int64) int64 { return
a.sizes[int64(a.data.offset)+slot] }
+
+func outOfBoundsListViewOffset(l offsetsAndSizes, slot int64, offsetLimit
int64) error {
+ offset := l.offsetAt(slot)
+ return fmt.Errorf("%w: Offset invariant failure: offset for slot %d out
of bounds. Expected %d to be at least 0 and less than %d", arrow.ErrInvalid,
slot, offset, offsetLimit)
+}
+
+func outOfBoundsListViewSize(l offsetsAndSizes, slot int64, offsetLimit int64)
error {
+ size := l.sizeAt(slot)
+ if size < 0 {
+ return fmt.Errorf("%w: Offset invariant failure: size for slot
%d out of bounds: %d < 0", arrow.ErrInvalid, slot, size)
+ } else {
+ offset := l.offsetAt(slot)
+ return fmt.Errorf("%w: Offset invariant failure: size for slot
%d out of bounds: %d + %d > %d", arrow.ErrInvalid, slot, offset, size,
offsetLimit)
+ }
+}
+
+// Pre-condition: Basic validation has already been performed
+func (a *array) fullyValidateOffsetsAndSizes(l offsetsAndSizes, offsetLimit
int64) error {
+ for slot := int64(0); slot < int64(a.Len()); slot += 1 {
+ size := l.sizeAt(slot)
+ if size > 0 {
+ offset := l.offsetAt(slot)
+ if offset < 0 || offset > offsetLimit {
+ return outOfBoundsListViewOffset(l, slot,
offsetLimit)
+ }
+ if size > offsetLimit-int64(offset) {
+ return outOfBoundsListViewSize(l, slot,
offsetLimit)
+ }
+ } else if size < 0 {
+ return outOfBoundsListViewSize(l, slot, offsetLimit)
+ }
+ }
+
+ return nil
+}
+
+func (a *array) validateOffsetsAndMaybeSizes(l offsetsAndSizes,
offsetByteWidth int, isListView bool, offsetLimit int64, fullValidation bool)
error {
+ nonEmpty := a.Len() > 0
+ if a.data.buffers[1] == nil {
+ // For length 0, an empty offsets buffer is accepted
(ARROW-544).
+ if nonEmpty {
+ return fmt.Errorf("non-empty array but offsets are
null")
+ } else {
+ return nil
+ }
+ }
+ if isListView {
+ if a.data.buffers[2] == nil {
Review Comment:
can we just combine these into a single conditional?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]