Re: [PR] Add simple Java HTTP server example [arrow-experiments]

via GitHub Fri, 08 Mar 2024 08:04:46 -0800


ianmcook commented on code in PR #4:
URL: https://github.com/apache/arrow-experiments/pull/4#discussion_r1517919769



##########
http/get_simple/java/server/src/main/java/com/example/ArrowHttpServer.java:
##########
@@ -0,0 +1,131 @@
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
+
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.server.Request;
+import org.eclipse.jetty.server.handler.AbstractHandler;
+
+import jakarta.servlet.ServletException;
+import jakarta.servlet.http.HttpServlet;
+import jakarta.servlet.http.HttpServletRequest;
+import jakarta.servlet.http.HttpServletResponse;
+
+public class ArrowHttpServer extends AbstractHandler {
+
+    static BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+
+    static Schema schema = new Schema(
+        List.of(
+            new Field("a", FieldType.nullable(new ArrowType.Int(64, true)), 
null),
+            new Field("b", FieldType.nullable(new ArrowType.Int(64, true)), 
null),
+            new Field("c", FieldType.nullable(new ArrowType.Int(64, true)), 
null),
+            new Field("d", FieldType.nullable(new ArrowType.Int(64, true)), 
null)
+        ));
+
+    static List<ArrowRecordBatch> batches;
+
+    static Random random = new Random();
+
+    public static List<ArrowRecordBatch> getPutData() {
+        int totalRecords = 100000000;
+        int length = 4096;
+
+        List<ArrowRecordBatch> batches = new ArrayList<>();
+
+        try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, 
allocator)) {
+            
+            String[] names = 
schema.getFields().stream().map(Field::getName).toArray(String[]::new);
+            for (String name : names) {
+                byte[] randomBytes = new byte[length * 8];
+                random.nextBytes(randomBytes);
+
+                byte[] validityBytes = new byte[length / 8];
+                Arrays.fill(validityBytes, (byte) 0xFF);
+
+                BigIntVector vector = (BigIntVector) root.getVector(name);
+                vector.allocateNew(length);
+                vector.setValueCount(length);
+                ArrowBuf dataBuffer = vector.getDataBuffer();
+                dataBuffer.setBytes(0, randomBytes);
+
+                ArrowBuf validityBuffer = vector.getValidityBuffer();
+                validityBuffer.setBytes(0, validityBytes);
+                root.setRowCount(length);
+            }
+
+            int records = 0;
+            int lastLength;
+            while (records < totalRecords) {
+                if (records + length > totalRecords) {
+                    lastLength = totalRecords - records;
+                    try (VectorSchemaRoot slice = root.slice(0, lastLength)) {
+                        VectorUnloader unloader = new VectorUnloader(slice);
+                        ArrowRecordBatch arb = unloader.getRecordBatch();
+                        batches.add(arb);
+                    }
+                    records += lastLength;
+                } else {
+                    VectorUnloader unloader = new VectorUnloader(root);
+                    ArrowRecordBatch arb = unloader.getRecordBatch();
+                    batches.add(arb);
+                    records += length;
+                }
+            }
+        }
+
+        return batches;
+    }
+
+    public void handle(String target, Request baseRequest, HttpServletRequest 
request, HttpServletResponse response)
+            throws IOException, ServletException {
+
+        response.setContentType("application/vnd.apache.arrow.stream");
+        response.setStatus(HttpServletResponse.SC_OK);
+

Review Comment:
   @tdcmeehan In the case where the data that you're writing out is fully 
materialized before you start writing it out, we could just create the IPC 
messages and count how many bytes are in them to provide an exact 
`Content-Length` before writing them. But in the case where the data is not 
fully materialized before you start writing it out, it would be extremely 
difficult to estimate the final content length. It might be easy enough to 
estimate it with toy examples, but in real world cases where you have 
dictionaries and other encodings, there's no simple approach to that and 
there's no tooling in any of the Arrow libraries AFAIK that does estimations 
like that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Add simple Java HTTP server example [arrow-experiments]

Reply via email to