matthewmturner commented on issue #208:
URL: https://github.com/apache/arrow-rs/issues/208#issuecomment-919341814


   hi @nevi-me - ive been building up a test that i could use to compare the 
ipc size pre and post the fix.  but i havent been able to produce the expected 
results.  Code is below:
   
   ```
   pub fn compare_ipc() {
       let arr_data = vec![1, 2, 3, 4, 5];
       let val_data = vec![5, 6, 7, 8, 9];
       let id_arr = Int32Array::from(arr_data);
       let val_arr = Int32Array::from(val_data);
       let id_arr_slice = id_arr.slice(1, 3);
       let val_arr_slice = val_arr.slice(1, 3);
   
       let schema = Schema::new(vec![
           Field::new("id", DataType::Int32, false),
           Field::new("val", DataType::Int32, false),
       ]);
   
       let raw_batch = RecordBatch::try_new(
           Arc::new(schema.clone()),
           vec![Arc::new(id_arr), Arc::new(val_arr)],
       )
       .unwrap();
       println!("{:?}", raw_batch);
   
       let slice_batch =
           RecordBatch::try_new(Arc::new(schema.clone()), vec![id_arr_slice, 
val_arr_slice]).unwrap();
       println!("{:?}", slice_batch);
   
       println!("Running first test");
       raw_batch
           .columns()
           .iter()
           .zip(slice_batch.columns())
           .for_each(|(a, b)| {
               println!("{:?} : {:?}", a.data(), b.data());
               assert_eq!(a.data_type(), b.data_type());
               assert_eq!(a.data().buffers()[0], b.data().buffers()[0]);
           });
   
       let raw_path = "raw_data.arrow";
       let slice_path = "slice_data.arrow";
   
       {
           let raw_file = File::create(raw_path).unwrap();
           let mut raw_writer = FileWriter::try_new(raw_file, &schema).unwrap();
   
           raw_writer.write(&raw_batch).unwrap();
           raw_writer.finish().unwrap();
       }
       {
           let slice_file = File::create(slice_path).unwrap();
           let mut slice_writer = FileWriter::try_new(slice_file, 
&schema).unwrap();
   
           slice_writer.write(&slice_batch).unwrap();
           slice_writer.finish().unwrap();
       }
   
       let raw_file = File::open(raw_path).unwrap();
       let slice_file = File::open(slice_path).unwrap();
       let mut raw_reader = FileReader::try_new(raw_file).unwrap();
       let mut slice_reader = FileReader::try_new(slice_file).unwrap();
   
       while let Some(Ok(raw_ipc_batch)) = raw_reader.next() {
           println!("{:?}", raw_ipc_batch);
           while let Some(Ok(slice_ipc_batch)) = slice_reader.next() {
               println!("{:?}", slice_ipc_batch);
               raw_ipc_batch
                   .columns()
                   .iter()
                   .zip(slice_ipc_batch.columns())
                   .for_each(|(a, b)| {
                       println!("{:?} : {:?}", a.data(), b.data());
                       assert_eq!(a.data_type(), b.data_type());
                       assert_eq!(a.data().buffers()[0], b.data().buffers()[0]);
                   });
           }
       }
   }
   ```
   Which produces the following output:
   ```
   RecordBatch { schema: Schema { fields: [Field { name: "id", data_type: 
Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, 
Field { name: "val", data_type: Int32, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: None }], metadata: {} }, columns: 
[PrimitiveArray<Int32>
   [
     1,
     2,
     3,
     4,
     5,
   ], PrimitiveArray<Int32>
   [
     5,
     6,
     7,
     8,
     9,
   ]] }
   RecordBatch { schema: Schema { fields: [Field { name: "id", data_type: 
Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, 
Field { name: "val", data_type: Int32, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: None }], metadata: {} }, columns: 
[PrimitiveArray<Int32>
   [
     2,
     3,
     4,
   ], PrimitiveArray<Int32>
   [
     6,
     7,
     8,
   ]] }
   Running first test
   ArrayData { data_type: Int32, len: 5, null_count: 0, offset: 0, buffers: 
[Buffer { data: Bytes { ptr: 0x11d606c40, len: 20, data: [1, 0, 0, 0, 2, 0, 0, 
0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0] }, offset: 0 }], child_data: [], 
null_bitmap: None } : ArrayData { data_type: Int32, len: 3, null_count: 0, 
offset: 1, buffers: [Buffer { data: Bytes { ptr: 0x11d606c40, len: 20, data: 
[1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0] }, offset: 0 }], 
child_data: [], null_bitmap: None }
   ArrayData { data_type: Int32, len: 5, null_count: 0, offset: 0, buffers: 
[Buffer { data: Bytes { ptr: 0x11d606d00, len: 20, data: [5, 0, 0, 0, 6, 0, 0, 
0, 7, 0, 0, 0, 8, 0, 0, 0, 9, 0, 0, 0] }, offset: 0 }], child_data: [], 
null_bitmap: None } : ArrayData { data_type: Int32, len: 3, null_count: 0, 
offset: 1, buffers: [Buffer { data: Bytes { ptr: 0x11d606d00, len: 20, data: 
[5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 9, 0, 0, 0] }, offset: 0 }], 
child_data: [], null_bitmap: None }
   RecordBatch { schema: Schema { fields: [Field { name: "id", data_type: 
Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, 
Field { name: "val", data_type: Int32, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: None }], metadata: {} }, columns: 
[PrimitiveArray<Int32>
   [
     1,
     2,
     3,
     4,
     5,
   ], PrimitiveArray<Int32>
   [
     5,
     6,
     7,
     8,
     9,
   ]] }
   RecordBatch { schema: Schema { fields: [Field { name: "id", data_type: 
Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, 
Field { name: "val", data_type: Int32, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: None }], metadata: {} }, columns: 
[PrimitiveArray<Int32>
   [
     null,
     null,
     5,
   ], PrimitiveArray<Int32>
   [
     null,
     null,
     9,
   ]] }
   ArrayData { data_type: Int32, len: 5, null_count: 0, offset: 0, buffers: 
[Buffer { data: Bytes { ptr: 0x11d607ac0, len: 24, data: [1, 0, 0, 0, 2, 0, 0, 
0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0] }, offset: 0 }], child_data: 
[], null_bitmap: None } : ArrayData { data_type: Int32, len: 3, null_count: 2, 
offset: 0, buffers: [Buffer { data: Bytes { ptr: 0x11d704240, len: 12, data: 
[0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0] }, offset: 0 }], child_data: [], 
null_bitmap: Some(Bitmap { bits: Buffer { data: Bytes { ptr: 0x11d7041c0, len: 
1, data: [4] }, offset: 0 } }) }
   thread 'main' panicked at 'assertion failed: `(left == right)`
     left: `Buffer { data: Bytes { ptr: 0x11d607ac0, len: 24, data: [1, 0, 0, 
0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0] }, offset: 0 }`,
    right: `Buffer { data: Bytes { ptr: 0x11d704240, len: 12, data: [0, 0, 0, 
0, 0, 0, 0, 0, 5, 0, 0, 0] }, offset: 0 }`', 
src/flight_sends_too_much_data.rs:149:21
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   ```
   Basically, im just trying to compare the buffers from two batches (one batch 
is a slice of the other) after reading their IPC files and comparing the value 
buffers.  Given what we are working on i was expecting the data to be the same 
(i guess the assertion would still fail after reading the IPC files as they 
would have different pointers but i expected the value arrays to have the same 
values).  But the value arrays were different(the full `ArrayData` values are 
above):
   
   ```
   left: `Buffer { data: Bytes { ptr: 0x11d607ac0, len: 24, data: [1, 0, 0, 0, 
2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0] }, offset: 0 }`,
   right: `Buffer { data: Bytes { ptr: 0x11d704240, len: 12, data: [0, 0, 0, 0, 
0, 0, 0, 0, 5, 0, 0, 0] }, offset: 0 }`', 
src/flight_sends_too_much_data.rs:149:21
   ```
   I'm going to keep playing around with this but wanted to get your thoughts 
on if I am approaching this the right way.
   
   Thanks again for all your help - much appreciated.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to