viirya commented on code in PR #2040:
URL: https://github.com/apache/arrow-rs/pull/2040#discussion_r919235111


##########
arrow/src/ipc/writer.rs:
##########
@@ -894,12 +1031,66 @@ fn write_array_data(
             Some(buffer) => buffer.clone(),
         };
 
-        offset = write_buffer(&null_buffer, buffers, arrow_data, offset);
+        offset = write_buffer(null_buffer.as_slice(), buffers, arrow_data, 
offset);
     }
 
-    array_data.buffers().iter().for_each(|buffer| {
-        offset = write_buffer(buffer, buffers, arrow_data, offset);
-    });
+    let data_type = array_data.data_type();
+    if matches!(
+        data_type,
+        DataType::Binary | DataType::LargeBinary | DataType::Utf8 | 
DataType::LargeUtf8
+    ) {
+        let total_bytes = get_total_bytes(array_data);
+        let value_buffer = &array_data.buffers()[1];
+        if buffer_need_truncate(
+            array_data.offset(),
+            value_buffer,
+            &BufferSpec::VariableWidth,
+            total_bytes,
+        ) {
+            // Rebase offsets and truncate values
+            let new_offsets = get_zero_based_value_offsets(array_data);
+            offset = write_buffer(new_offsets.as_slice(), buffers, arrow_data, 
offset);
+
+            let byte_offset = get_buffer_offset(array_data);
+            let buffer_length = min(total_bytes, value_buffer.len() - 
byte_offset);
+            let buffer_slice =
+                &value_buffer.as_slice()[byte_offset..(byte_offset + 
buffer_length)];
+            offset = write_buffer(buffer_slice, buffers, arrow_data, offset);
+        } else {
+            array_data.buffers().iter().for_each(|buffer| {
+                offset = write_buffer(buffer.as_slice(), buffers, arrow_data, 
offset);
+            });
+        }
+    } else if DataType::is_numeric(data_type)
+        || DataType::is_temporal(data_type)
+        || matches!(array_data.data_type(), DataType::FixedSizeBinary(_))

Review Comment:
   Hmm, yea, seems so. Rechecked C++ implementation, it truncates 
DictionaryArray keys: 
https://github.com/apache/arrow/blob/master/cpp/src/arrow/ipc/writer.cc#L524-L527



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to