bkietz commented on code in PR #39019:
URL: https://github.com/apache/arrow/pull/39019#discussion_r1427319054


##########
go/arrow/array/list.go:
##########
@@ -1411,217 +1410,56 @@ func (b *baseListViewBuilder) UnmarshalJSON(data 
[]byte) error {
        return b.Unmarshal(dec)
 }
 
-// Pre-conditions:
-//
-//     input.DataType() is ListViewType
-//     input.Len() > 0 && input.NullN() != input.Len()
-func minListViewOffset32(input arrow.ArrayData) int32 {
-       var bitmap []byte
-       if input.Buffers()[0] != nil {
-               bitmap = input.Buffers()[0].Bytes()
-       }
-       offsets := 
arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():]
-       sizes := 
arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():]
-
-       isNull := func(i int) bool {
-               return bitmap != nil && bitutil.BitIsNotSet(bitmap, 
input.Offset()+i)
-       }
-
-       // It's very likely that the first non-null non-empty list-view starts 
at
-       // offset 0 of the child array.
-       i := 0
-       for i < input.Len() && (isNull(i) || sizes[i] == 0) {
-               i += 1
-       }
-       if i >= input.Len() {
-               return 0
-       }
-       minOffset := offsets[i]
-       if minOffset == 0 {
-               // early exit: offset 0 found already
-               return 0
-       }
-
-       // Slow path: scan the buffers entirely.
-       i += 1
-       for ; i < input.Len(); i += 1 {
-               if isNull(i) {
-                       continue
-               }
-               offset := offsets[i]
-               if offset < minOffset && sizes[i] > 0 {
-                       minOffset = offset
-               }
-       }
-       return minOffset
-}
-
-// Find the maximum offset+size in a LIST_VIEW array.
+// Find the minimum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array.
 //
 // Pre-conditions:
 //
-//     input.DataType() is ListViewType
-//     input.Len() > 0 && input.NullN() != input.Len()
-func maxListViewOffset32(input arrow.ArrayData) int {
+//     input.DataType() is ListViewType if Offset=int32 or LargeListViewType 
if Offset=int64
+//     input.Len() > 0

Review Comment:
   RangeOfUsedValues was causing an error in IPC writing where it was being 
used to determine how much of the values array to write. This could drop values 
referenced by views under null bits, which later produced negative offsets. I 
was trying to rewrite this function to also consider views under null bits 
(less efficient for concat but still usable for IPC writing). I have reverted 
the change to RangeOfUsedValues and IPC writing no longer tries to slice the 
values array. This is less optimal for IPC writing but we can add optimizations 
back later, for now   I'd like to get the integration test passing.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to