[GitHub] [arrow] BryanCutler commented on a change in pull request #7275: ARROW-6110: [Java][Integration] Support LargeList Type and add integration test with C++

2020-07-02 Thread GitBox


BryanCutler commented on a change in pull request #7275:
URL: https://github.com/apache/arrow/pull/7275#discussion_r449226788



##
File path: 
java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
##
@@ -0,0 +1,1004 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.complex.impl.UnionLargeListReader;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A list vector contains lists of a specific type of elements.  Its structure 
contains 3 elements.
+ * 
+ * A validity buffer.
+ *  An offset buffer, that denotes lists boundaries. 
+ *  A child data vector that contains the elements of lists. 
+ * 
+ *
+ * This is the LargeList variant of list, it has a 64-bit wide offset
+ *
+ * 
+ *   WARNING: Currently Arrow in Java doesn't support 64-bit vectors. This 
class
+ *   follows the expected behaviour of a LargeList but doesn't actually 
support allocating
+ *   a 64-bit vector. It has little use until 64-bit vectors are supported and 
should be used
+ *   with caution.
+ *   todo review checkedCastToInt usage in this class.
+ *   Once int64 indexed vectors are supported these checks aren't needed.
+ * 
+ */
+public class LargeListVector extends BaseValueVector implements 
RepeatedValueVector, FieldVector, PromotableVector {
+
+  public static LargeListVector empty(String name, BufferAllocator allocator) {
+return new LargeListVector(name, allocator, 
FieldType.nullable(ArrowType.LargeList.INSTANCE), null);
+  }
+
+  public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE;
+  public static final String DATA_VECTOR_NAME = "$data$";
+
+  public static final byte OFFSET_WIDTH = 8;
+  protected ArrowBuf offsetBuffer;
+  protected FieldVector vector;
+  protected final CallBack callBack;
+  protected int valueCount;
+  protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * 
OFFSET_WIDTH;
+  private final String name;
+
+  protected String defaultDataVectorName = DATA_VECTOR_NAME;
+  protected ArrowBuf validityBuff

[GitHub] [arrow] BryanCutler commented on a change in pull request #7275: ARROW-6110: [Java][Integration] Support LargeList Type and add integration test with C++

2020-06-30 Thread GitBox


BryanCutler commented on a change in pull request #7275:
URL: https://github.com/apache/arrow/pull/7275#discussion_r448032771



##
File path: 
java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
##
@@ -0,0 +1,991 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.complex.impl.UnionLargeListReader;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A list vector contains lists of a specific type of elements.  Its structure 
contains 3 elements.
+ * 
+ * A validity buffer.
+ *  An offset buffer, that denotes lists boundaries. 
+ *  A child data vector that contains the elements of lists. 
+ * 
+ *
+ * This is the LargeList variant of list, it has a 64-bit wide offset
+ *
+ * 
+ *   todo review checkedCastToInt usage in this class.
+ *   Once int64 indexed vectors are supported these checks aren't needed.
+ * 
+ */
+public class LargeListVector extends BaseValueVector implements 
RepeatedValueVector, BaseListVector, PromotableVector {
+
+  public static LargeListVector empty(String name, BufferAllocator allocator) {
+return new LargeListVector(name, allocator, 
FieldType.nullable(ArrowType.LargeList.INSTANCE), null);
+  }
+
+  public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE;
+  public static final String DATA_VECTOR_NAME = "$data$";
+
+  public static final byte OFFSET_WIDTH = 8;
+  protected ArrowBuf offsetBuffer;
+  protected FieldVector vector;
+  protected final CallBack callBack;
+  protected int valueCount;
+  protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * 
OFFSET_WIDTH;
+  private final String name;
+
+  protected String defaultDataVectorName = DATA_VECTOR_NAME;
+  protected ArrowBuf validityBuffer;
+  protected UnionLargeListReader reader;
+  private final FieldType fieldType;
+  private int validityAllocationSizeInBytes;
+
+  /**
+   * The maximum index that is actually set.
+   */
+  private long lastSet;
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param name The nam