lidavidm commented on code in PR #41967:
URL: https://github.com/apache/arrow/pull/41967#discussion_r1630595193


##########
java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java:
##########
@@ -117,10 +121,19 @@ private ArrowBuf importOffsets(ArrowType type, long 
bytesPerSlot) {
     return importBuffer(type, 1, capacity);
   }
 
+  private ArrowBuf importView(ArrowType type) {
+    final long capacity = (long) fieldNode.getLength() * 
BaseVariableWidthViewVector.ELEMENT_SIZE;
+    return importBuffer(type, 1, capacity);
+  }

Review Comment:
   Why do we need this vs just using importFixedBytes?



##########
java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java:
##########
@@ -117,10 +121,19 @@ private ArrowBuf importOffsets(ArrowType type, long 
bytesPerSlot) {
     return importBuffer(type, 1, capacity);
   }
 
+  private ArrowBuf importView(ArrowType type) {
+    final long capacity = (long) fieldNode.getLength() * 
BaseVariableWidthViewVector.ELEMENT_SIZE;
+    return importBuffer(type, 1, capacity);
+  }
+
   private ArrowBuf importData(ArrowType type, long capacity) {
     return importBuffer(type, 2, capacity);
   }
 
+  private ArrowBuf importViewData(ArrowType type, int index, long capacity) {
+    return importBuffer(type, index, capacity);
+  }

Review Comment:
   I don't think a pure alias helps readability here



##########
java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java:
##########
@@ -1667,4 +1667,23 @@ protected void getData(int index, ReusableBuffer<?> 
buffer) {
   public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
     return visitor.visit(this, value);
   }
+
+  @Override
+  public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, 
long nullValue) {
+    exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
+
+    if (viewBuffer.capacity() == 0) {
+      // Empty view buffer is allowed here.
+      // We set `retain = false` to explicitly not increase the ref count for 
the exported buffer.
+      // The ref count of the newly created buffer (i.e., 1) already 
represents the usage
+      // of the imported side.
+      exportBuffer(allocator.buffer(INITIAL_BYTE_COUNT), buffers, buffersPtr, 
nullValue, false);

Review Comment:
   Why can't we export an empty buffer? That is allowed by the spec



##########
java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java:
##########
@@ -210,9 +223,48 @@ public List<ArrowBuf> visit(ArrowType.Utf8 type) {
     }
   }
 
+  private List<ArrowBuf> visitVariableWidthView(ArrowType type) {
+    try (ArrowBuf view = importView(type)) {
+      List<ArrowBuf> buffers = new ArrayList<>();
+      view.getReferenceManager().retain();
+      ArrowBuf maybeValidityBuffer = maybeImportBitmap(type);
+      buffers.add(maybeValidityBuffer);
+      buffers.add(view);
+      final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE;
+      final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH;
+      final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH;
+      final int lengthPrefixWidth = lengthWidth + prefixWidth;
+      // Map to store the data buffer index and the total length of data in 
that buffer
+      Map<Integer, Long> dataBufferInfo = new HashMap<>();
+      for (int i = 0; i < fieldNode.getLength(); i++) {
+        final int length = view.getInt((long) i * elementSize);
+        if (length > BaseVariableWidthViewVector.INLINE_SIZE) {
+          checkState(maybeValidityBuffer != null,
+              "Validity buffer is required for data of type " + type);
+          if (BitVectorHelper.get(maybeValidityBuffer, i) == 1) {
+            final int bufferIndex =
+                view.getInt(((long) i * elementSize) + lengthPrefixWidth);
+            if (dataBufferInfo.containsKey(bufferIndex)) {
+              dataBufferInfo.compute(bufferIndex, (key, value) -> value != 
null ? value + (long) length : 0);
+            } else {
+              dataBufferInfo.put(bufferIndex, (long) length);
+            }
+          }
+        }
+      }

Review Comment:
   I'm not sure you could do much better? Other than just allocating a static 
long[] array of lengths since you should know the number of buffers already



##########
java/c/src/test/java/org/apache/arrow/c/StreamTest.java:
##########
@@ -132,6 +134,80 @@ public void roundtripStrings() throws Exception {
     }
   }
 
+  @Test
+  public void roundtripStringViews() throws Exception {
+    final Schema schema = new Schema(Arrays.asList(Field.nullable("ints", new 
ArrowType.Int(32, true)),
+        Field.nullable("string_views", new ArrowType.Utf8View())));
+    final List<ArrowRecordBatch> batches = new ArrayList<>();
+    try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, 
allocator)) {
+      final IntVector ints = (IntVector) root.getVector(0);
+      final ViewVarCharVector strs = (ViewVarCharVector) root.getVector(1);
+      VectorUnloader unloader = new VectorUnloader(root);
+
+      root.allocateNew();
+      ints.setSafe(0, 1);
+      ints.setSafe(1, 2);
+      ints.setSafe(2, 4);
+      ints.setSafe(3, 8);
+      strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8));
+      strs.setSafe(1, "a".getBytes(StandardCharsets.UTF_8));
+      strs.setSafe(2, "bc1234567890bc".getBytes(StandardCharsets.UTF_8));
+      strs.setSafe(3, "defg1234567890defg".getBytes(StandardCharsets.UTF_8));
+      root.setRowCount(4);
+      batches.add(unloader.getRecordBatch());
+
+      root.allocateNew();
+      ints.setSafe(0, 1);
+      ints.setNull(1);
+      ints.setSafe(2, 4);
+      ints.setNull(3);
+      strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8));
+      strs.setNull(1);
+      strs.setSafe(2, "bc1234567890bc".getBytes(StandardCharsets.UTF_8));
+      strs.setNull(3);
+      root.setRowCount(4);
+      batches.add(unloader.getRecordBatch());
+      roundtrip(schema, batches);
+    }
+  }
+
+  @Test
+  public void roundtripBinaryViews() throws Exception {
+    final Schema schema = new Schema(Arrays.asList(Field.nullable("ints", new 
ArrowType.Int(32, true)),
+        Field.nullable("binary_views", new ArrowType.BinaryView())));
+    final List<ArrowRecordBatch> batches = new ArrayList<>();
+    try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, 
allocator)) {
+      final IntVector ints = (IntVector) root.getVector(0);
+      final ViewVarBinaryVector strs = (ViewVarBinaryVector) root.getVector(1);
+      VectorUnloader unloader = new VectorUnloader(root);
+
+      root.allocateNew();
+      ints.setSafe(0, 1);
+      ints.setSafe(1, 2);
+      ints.setSafe(2, 4);
+      ints.setSafe(3, 8);
+      strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8));

Review Comment:
   yes



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to