>From a quick glance, it seems one code path uses the **stream** writer not the 
>file writer. See this documentation section and the next for the difference 
>[1].

[1]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format

-David

On Mon, Feb 6, 2023, at 14:58, Vilayannur Sitaraman wrote:
> Hi,
>    I am trying out Arrow Flight and I have these two programs to write and 
> read from file.
>  
> public class WriteToBuffer {
>   public static void main(String[] args) {
>     WriteToBuffer wb = new WriteToBuffer();
>     wb.execute1();
>   }
> 
>   public void execute1(){
>     try (BufferAllocator rootAllocator = new RootAllocator()) {
>       Field name = new Field("name", FieldType.*nullable*(new 
> ArrowType.Utf8()), null);
>       Field age = new Field("age", FieldType.*nullable*(new ArrowType.Int(32, 
> true)), null);
>       Schema schemaPerson = new Schema(*asList*(name, age));
>       try(
>           VectorSchemaRoot vectorSchemaRoot = 
> VectorSchemaRoot.*create*(schemaPerson, rootAllocator)
>       ){
>         VarCharVector nameVector = (VarCharVector) 
> vectorSchemaRoot.getVector("name");
>         nameVector.allocateNew(3);
>         nameVector.set(0, "David".getBytes());
>         nameVector.set(1, "Gladis".getBytes());
>         nameVector.set(2, "Juan".getBytes());
>         IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
>         ageVector.allocateNew(3);
>         ageVector.set(0, 10);
>         ageVector.set(1, 20);
>         ageVector.set(2, 30);
>         vectorSchemaRoot.setRowCount(3);
>         File file = new File("streaming_to_file.arrow");
>         try (
>             FileOutputStream fileOutputStream = new FileOutputStream(file);
>             ArrowStreamWriter writer = new 
> ArrowStreamWriter(vectorSchemaRoot, null, fileOutputStream.getChannel())
>         ){
>           writer.start();
>           System.*out*.println("Writing Batch");
>           writer.writeBatch();
>           System.*out*.println("Number of rows written: " + 
> vectorSchemaRoot.getRowCount());
>           writer.end();
>         } catch (IOException e) {
>           e.printStackTrace();
>         }
>       }
>     }
>   }
>   public ArrowFileWriter execute() {
>     try (
>         BufferAllocator allocator = new RootAllocator()) {
>       Field name = new Field("name", FieldType.*nullable*(new 
> ArrowType.Utf8()), null);
>       Field age = new Field("age", FieldType.*nullable*(new ArrowType.Int(32, 
> true)), null);
>       Schema schemaPerson = new Schema(*asList*(name, age));
>       try (
>           VectorSchemaRoot vectorSchemaRoot = 
> VectorSchemaRoot.*create*(schemaPerson, allocator)
>       ) {
>         VarCharVector nameVector = (VarCharVector) 
> vectorSchemaRoot.getVector("name");
>         nameVector.allocateNew(3);
>         nameVector.set(0, "David".getBytes());
>         nameVector.set(1, "Gladis".getBytes());
>         nameVector.set(2, "Juan".getBytes());
>         IntVector ageVector = (IntVector) vectorSchemaRoot.getVector("age");
>         ageVector.allocateNew(3);
>         ageVector.set(0, 10);
>         ageVector.set(1, 20);
>         ageVector.set(2, 30);
>         vectorSchemaRoot.setRowCount(3);
>         try (
>             ByteArrayOutputStream out = new ByteArrayOutputStream();
>             ArrowFileWriter writer = new ArrowFileWriter(vectorSchemaRoot, 
> null, Channels.*newChannel*(out))
>         ) {
>           writer.start();
>           writer.writeBatch();
> 
>           System.*out*.println("Record batches written: " + 
> writer.getRecordBlocks().size() +
>               ". Number of rows written: " + vectorSchemaRoot.getRowCount());
>           return writer;
>         } catch (IOException e) {
>           e.printStackTrace();
>         }
>       }
>     }
>     return null;
>   }
> }
> public class ReadFromBuffer {
> 
>   public static void main(String[] args) {
> 
>     ReadFromBuffer rb = new ReadFromBuffer();
>     rb.execute1();
>   }
> 
>   public void execute1(){
>     File file = new File("streaming_to_file.arrow");
>     try(
>         BufferAllocator rootAllocator = new RootAllocator();
>         FileInputStream fileInputStream = new FileInputStream(file);
>         ArrowFileReader reader = new 
> ArrowFileReader(fileInputStream.getChannel(), rootAllocator)
>     ){
>       System.*out*.println("Record batches in file: " + 
> reader.getRecordBlocks().size());
>       for (ArrowBlock arrowBlock : reader.getRecordBlocks()) {
>         reader.loadRecordBatch(arrowBlock);
>         VectorSchemaRoot vectorSchemaRootRecover = 
> reader.getVectorSchemaRoot();
>         System.*out*.print(vectorSchemaRootRecover.contentToTSVString());
>       }
>     } catch (IOException e) {
>       e.printStackTrace();
>     }
>   }
> 
> 
>   public void execute() {
> 
>     //Path path = 
> Paths.get("./thirdpartydeps/arrowfiles/random_access.arrow");
>     Path path = Paths.*get*("streaming_to_file.arrow");
>     try (
>         BufferAllocator rootAllocator = new RootAllocator();
>         ArrowFileReader reader = new ArrowFileReader(new 
> SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(
>             Files.*readAllBytes*(path))), rootAllocator)
>     ) {
>       //System.out.println("Record batches in file: " + 
> reader.getRecordBlocks().size());
>       while (reader.loadNextBatch()) {
>         for (ArrowBlock arrowBlock : reader.getRecordBlocks()) {
>           reader.loadRecordBatch(arrowBlock);
>           VectorSchemaRoot vectorSchemaRootRecover = 
> reader.getVectorSchemaRoot();
>           System.*out*.print(vectorSchemaRootRecover.contentToTSVString());
>         }
>       }
>     } catch (IOException e) {
>       e.printStackTrace();
>     }
> 
>   }
> }
>  
> The Write program successfully writes the arrow file but when I execute the 
> read program I get this error…
> (PyGDev1) C02G35CWMD6R:scala vsitaraman$ java -cp 
> ./jars/chapter2-assembly-1.0.jar main.java.chapter2.ReadFromBuffer
> ERROR StatusLogger Log4j2 could not find a logging implementation. Please add 
> log4j-core to the classpath. Using SimpleLogger to log to the console...
> Exception in thread "main" 
> org.apache.arrow.vector.ipc.InvalidArrowFileException: missing Magic number 
> [0, 0, -1, -1, -1, -1, 0, 0, 0, 0]
>         at 
> org.apache.arrow.vector.ipc.ArrowFileReader.readSchema(ArrowFileReader.java:98)
>         at 
> org.apache.arrow.vector.ipc.ArrowReader.initialize(ArrowReader.java:185)
>         at 
> org.apache.arrow.vector.ipc.ArrowFileReader.initialize(ArrowFileReader.java:120)
>         at 
> org.apache.arrow.vector.ipc.ArrowReader.ensureInitialized(ArrowReader.java:176)
>         at 
> org.apache.arrow.vector.ipc.ArrowFileReader.getRecordBlocks(ArrowFileReader.java:183)
>         at main.java.chapter2.ReadFromBuffer.execute1(ReadFromBuffer.java:32)
>         at main.java.chapter2.ReadFromBuffer.main(ReadFromBuffer.java:22)
>  
>  
> The arrow file that was written is as follows:
> 00000000: ffff ffff c800 0000 1000 0000 0000 0a00  ................
> 
> 00000010: 0e00 0600 0d00 0800 0a00 0000 0000 0400  ................
> 00000020: 1000 0000 0001 0a00 0c00 0000 0800 0400  ................
> 00000030: 0a00 0000 0800 0000 0800 0000 0000 0000  ................
> 00000040: 0200 0000 5800 0000 0400 0000 c2ff ffff  ....X...........
> 00000050: 1400 0000 1400 0000 1c00 0000 0000 0201  ................
> 00000060: 2000 0000 0000 0000 0000 0000 0800 0c00   ...............
> 00000070: 0800 0700 0800 0000 0000 0001 2000 0000  ............ ...
> 00000080: 0300 0000 6167 6500 0000 1200 1800 1400  ....age.........
> 00000090: 1300 1200 0c00 0000 0800 0400 1200 0000  ................
> 000000a0: 1400 0000 1400 0000 1800 0000 0000 0501  ................
> 000000b0: 1400 0000 0000 0000 0000 0000 0400 0400  ................
> 000000c0: 0400 0000 0400 0000 6e61 6d65 0000 0000  ........name....
> 000000d0: ffff ffff c800 0000 1400 0000 0000 0000  ................
> 000000e0: 0c00 1600 0e00 1500 1000 0400 0c00 0000  ................
> 8 lines filtered
>  
> What could I be missing. Thanks
> Sitaraman

Reply via email to