This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 19f0ada47 Fix generate_unions_case for Rust case (#1677)
19f0ada47 is described below

commit 19f0ada47333eb80105f4ff53aaf887b2efa8873
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Tue May 10 16:45:24 2022 -0700

    Fix generate_unions_case for Rust case (#1677)
    
    * Fix generate_unions_case for rust case
    
    * Add test
---
 arrow/src/datatypes/datatype.rs    | 46 +++++++++++++++++++++++++++
 arrow/src/datatypes/field.rs       | 24 ++++++++++++++
 arrow/src/datatypes/mod.rs         | 64 ++++++++++++++++++++++++++++++++++++++
 arrow/src/util/integration_util.rs |  4 +++
 integration-testing/src/lib.rs     | 55 ++++++++++++++++++++++++++++++++
 5 files changed, 193 insertions(+)

diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index e6f5461fd..c5cc8f017 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -499,6 +499,52 @@ impl DataType {
                         ))
                     }
                 }
+                Some(s) if s == "union" => {
+                    if let Some(Value::String(mode)) = map.get("mode") {
+                        let union_mode = if mode == "SPARSE" {
+                            UnionMode::Sparse
+                        } else if mode == "DENSE" {
+                            UnionMode::Dense
+                        } else {
+                            return Err(ArrowError::ParseError(format!(
+                                "Unknown union mode {:?} for union",
+                                mode
+                            )));
+                        };
+                        if let Some(type_ids) = map.get("typeIds") {
+                            let type_ids = type_ids
+                                .as_array()
+                                .unwrap()
+                                .iter()
+                                .map(|t| t.as_i64().unwrap())
+                                .collect::<Vec<_>>();
+
+                            let default_fields = type_ids
+                                .iter()
+                                .map(|t| {
+                                    Field::new("", DataType::Boolean, 
true).with_metadata(
+                                        Some(
+                                            [("type_id".to_string(), 
t.to_string())]
+                                                .iter()
+                                                .cloned()
+                                                .collect(),
+                                        ),
+                                    )
+                                })
+                                .collect::<Vec<_>>();
+
+                            Ok(DataType::Union(default_fields, union_mode))
+                        } else {
+                            Err(ArrowError::ParseError(
+                                "Expecting a typeIds for union ".to_string(),
+                            ))
+                        }
+                    } else {
+                        Err(ArrowError::ParseError(
+                            "Expecting a mode for union".to_string(),
+                        ))
+                    }
+                }
                 Some(other) => Err(ArrowError::ParseError(format!(
                     "invalid or unsupported type name: {} in {:?}",
                     other, json
diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs
index ded7fc67b..6471f1ed7 100644
--- a/arrow/src/datatypes/field.rs
+++ b/arrow/src/datatypes/field.rs
@@ -390,6 +390,30 @@ impl Field {
                             }
                         }
                     }
+                    DataType::Union(fields, mode) => match map.get("children") 
{
+                        Some(Value::Array(values)) => {
+                            let mut union_fields: Vec<Field> =
+                                
values.iter().map(Field::from).collect::<Result<_>>()?;
+                            
fields.iter().zip(union_fields.iter_mut()).for_each(
+                                |(f, union_field)| {
+                                    union_field.set_metadata(Some(
+                                        f.metadata().unwrap().clone(),
+                                    ));
+                                },
+                            );
+                            DataType::Union(union_fields, mode)
+                        }
+                        Some(_) => {
+                            return Err(ArrowError::ParseError(
+                                "Field 'children' must be an 
array".to_string(),
+                            ))
+                        }
+                        None => {
+                            return Err(ArrowError::ParseError(
+                                "Field missing 'children' 
attribute".to_string(),
+                            ));
+                        }
+                    },
                     _ => data_type,
                 };
 
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index 2c7fc8183..c3015972a 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -392,6 +392,70 @@ mod tests {
         assert_eq!(expected, dt);
     }
 
+    #[test]
+    fn parse_union_from_json() {
+        let json = r#"
+        {
+            "name": "my_union",
+            "nullable": false,
+            "type": {
+                "name": "union",
+                "mode": "SPARSE",
+                "typeIds": [
+                    5,
+                    7
+                ]
+            },
+            "children": [
+                {
+                    "name": "f1",
+                    "type": {
+                        "name": "int",
+                        "isSigned": true,
+                        "bitWidth": 32
+                    },
+                    "nullable": true,
+                    "children": []
+                },
+                {
+                    "name": "f2",
+                    "type": {
+                        "name": "utf8"
+                    },
+                    "nullable": true,
+                    "children": []
+                }
+            ]
+        }
+        "#;
+        let value: Value = serde_json::from_str(json).unwrap();
+        let dt = Field::from(&value).unwrap();
+
+        let expected = Field::new(
+            "my_union",
+            DataType::Union(
+                vec![
+                    Field::new("f1", DataType::Int32, true).with_metadata(Some(
+                        [("type_id".to_string(), "5".to_string())]
+                            .iter()
+                            .cloned()
+                            .collect(),
+                    )),
+                    Field::new("f2", DataType::Utf8, true).with_metadata(Some(
+                        [("type_id".to_string(), "7".to_string())]
+                            .iter()
+                            .cloned()
+                            .collect(),
+                    )),
+                ],
+                UnionMode::Sparse,
+            ),
+            false,
+        );
+
+        assert_eq!(expected, dt);
+    }
+
     #[test]
     fn parse_utf8_from_json() {
         let json = "{\"name\":\"utf8\"}";
diff --git a/arrow/src/util/integration_util.rs 
b/arrow/src/util/integration_util.rs
index f19ceb4bb..a174da6ea 100644
--- a/arrow/src/util/integration_util.rs
+++ b/arrow/src/util/integration_util.rs
@@ -132,6 +132,8 @@ pub struct ArrowJsonColumn {
     pub data: Option<Vec<Value>>,
     #[serde(rename = "OFFSET")]
     pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are 
strings
+    #[serde(rename = "TYPE_ID")]
+    pub type_id: Option<Vec<i8>>,
     pub children: Option<Vec<ArrowJsonColumn>>,
 }
 
@@ -472,6 +474,7 @@ impl ArrowJsonBatch {
                         validity: Some(validity),
                         data: Some(data),
                         offset: None,
+                        type_id: None,
                         children: None,
                     }
                 }
@@ -481,6 +484,7 @@ impl ArrowJsonBatch {
                     validity: None,
                     data: None,
                     offset: None,
+                    type_id: None,
                     children: None,
                 },
             };
diff --git a/integration-testing/src/lib.rs b/integration-testing/src/lib.rs
index 43b7cec01..c70459938 100644
--- a/integration-testing/src/lib.rs
+++ b/integration-testing/src/lib.rs
@@ -632,6 +632,61 @@ fn array_from_json(
             let array = MapArray::from(array_data);
             Ok(Arc::new(array))
         }
+        DataType::Union(fields, _) => {
+            let field_type_ids = fields
+                .iter()
+                .enumerate()
+                .into_iter()
+                .map(|(idx, f)| {
+                    (
+                        f.metadata()
+                            .and_then(|m| m.get("type_id"))
+                            .unwrap()
+                            .parse::<i8>()
+                            .unwrap(),
+                        idx,
+                    )
+                })
+                .collect::<HashMap<_, _>>();
+
+            let type_ids = if let Some(type_id) = json_col.type_id {
+                type_id
+                    .iter()
+                    .map(|t| {
+                        if field_type_ids.contains_key(t) {
+                            Ok(*(field_type_ids.get(t).unwrap()) as i8)
+                        } else {
+                            Err(ArrowError::JsonError(format!(
+                                "Unable to find type id {:?}",
+                                t
+                            )))
+                        }
+                    })
+                    .collect::<Result<_>>()?
+            } else {
+                vec![]
+            };
+
+            let offset: Option<Buffer> = json_col.offset.map(|offsets| {
+                let offsets: Vec<i32> =
+                    offsets.iter().map(|v| v.as_i64().unwrap() as 
i32).collect();
+                Buffer::from(&offsets.to_byte_slice())
+            });
+
+            let mut children: Vec<(Field, Arc<dyn Array>)> = vec![];
+            for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
+                let array = array_from_json(field, col, dictionaries)?;
+                children.push((field.clone(), array));
+            }
+
+            let array = UnionArray::try_new(
+                Buffer::from(&type_ids.to_byte_slice()),
+                offset,
+                children,
+            )
+            .unwrap();
+            Ok(Arc::new(array))
+        }
         t => Err(ArrowError::JsonError(format!(
             "data type {:?} not supported",
             t

Reply via email to