tustvold commented on code in PR #5080:
URL: https://github.com/apache/arrow-rs/pull/5080#discussion_r1397484346


##########
arrow-integration-testing/src/lib.rs:
##########
@@ -44,7 +50,50 @@ pub struct ArrowFile {
     pub batches: Vec<RecordBatch>,
 }
 
-pub fn read_json_file(json_name: &str) -> Result<ArrowFile> {
+// Canonicalize the names of map fields in a schema
+pub fn canonicalize_schema(schema: &Schema) -> Schema {
+    let fields = schema
+        .fields()
+        .iter()
+        .map(|field| match field.data_type() {
+            DataType::Map(child_field, sorted) => match 
child_field.data_type() {
+                DataType::Struct(fields) if fields.len() == 2 => {
+                    let first_field = fields.get(0).unwrap();
+                    let key_field =
+                        Arc::new(Field::new("key", 
first_field.data_type().clone(), false));
+                    let second_field = fields.get(1).unwrap();
+                    let value_field = Arc::new(Field::new(
+                        "value",
+                        second_field.data_type().clone(),
+                        second_field.is_nullable(),
+                    ));
+
+                    let fields = Fields::from([key_field, value_field]);
+                    let struct_type = DataType::Struct(fields);
+                    let child_field = Field::new("entries", struct_type, 
false);
+
+                    Arc::new(Field::new(
+                        field.name().as_str(),
+                        DataType::Map(Arc::new(child_field), *sorted),
+                        field.is_nullable(),
+                    ))
+                }
+                _ => panic!("The child field of Map type should be Struct type 
with 2 fields."),
+            },
+            _ => field.clone(),
+        })
+        .collect::<Fields>();
+
+    Schema::new(fields).with_metadata(schema.metadata().clone())
+}
+
+struct PartialArrowFile {

Review Comment:
   Perhaps we could get a doc comment explaining how this differs from 
ArrowFile? Perhaps LazyArrowFile might be a better name??
   
   Alternatively I wonder if we could just make ArrowFile lazy, and have member 
functions for decoding a batch to RecordBatch by index or something



##########
arrow/src/ffi.rs:
##########
@@ -234,46 +235,59 @@ pub fn to_ffi(data: &ArrayData) -> 
Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
 ///
 /// This struct assumes that the incoming data agrees with the C data 
interface.
 pub fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> 
Result<ArrayData> {
+    let dt = DataType::try_from(schema)?;
     let array = Arc::new(array);
-    let tmp = ArrowArray {
+    let tmp = ImportedArrowArray {
         array: &array,
-        schema,
+        data_type: dt,
+        owner: &array,
+    };
+    tmp.consume()
+}
+
+/// Import [ArrayData] from the C Data Interface
+///
+/// # Safety
+///
+/// This struct assumes that the incoming data agrees with the C data 
interface.
+pub fn from_ffi_and_data_type(array: FFI_ArrowArray, data_type: DataType) -> 
Result<ArrayData> {

Review Comment:
   I think this method probably should be unsafe



##########
arrow/src/ffi.rs:
##########
@@ -234,46 +235,59 @@ pub fn to_ffi(data: &ArrayData) -> 
Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
 ///
 /// This struct assumes that the incoming data agrees with the C data 
interface.
 pub fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> 
Result<ArrayData> {

Review Comment:
   This method should also be unsafe, I don't know why it isn't



##########
arrow/src/ffi.rs:
##########
@@ -388,25 +437,20 @@ impl<'a> ArrowArray<'a> {
         unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
     }
 
-    fn child(&self, index: usize) -> ArrowArray {
-        ArrowArray {
-            array: self.array.child(index),
-            schema: self.schema.child(index),
-            owner: self.owner,
-        }
-    }
-
-    fn dictionary(&self) -> Option<ArrowArray> {
-        match (self.array.dictionary(), self.schema.dictionary()) {
-            (Some(array), Some(schema)) => Some(ArrowArray {
+    fn dictionary(&self) -> Result<Option<ImportedArrowArray>> {
+        match (self.array.dictionary(), &self.data_type) {
+            (Some(array), DataType::Dictionary(_, value_type)) => 
Ok(Some(ImportedArrowArray {
                 array,
-                schema,
+                data_type: *value_type.clone(),

Review Comment:
   Is the asterisk needed?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to