This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 19f0ada47 Fix generate_unions_case for Rust case (#1677)
19f0ada47 is described below
commit 19f0ada47333eb80105f4ff53aaf887b2efa8873
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Tue May 10 16:45:24 2022 -0700
Fix generate_unions_case for Rust case (#1677)
* Fix generate_unions_case for rust case
* Add test
---
arrow/src/datatypes/datatype.rs | 46 +++++++++++++++++++++++++++
arrow/src/datatypes/field.rs | 24 ++++++++++++++
arrow/src/datatypes/mod.rs | 64 ++++++++++++++++++++++++++++++++++++++
arrow/src/util/integration_util.rs | 4 +++
integration-testing/src/lib.rs | 55 ++++++++++++++++++++++++++++++++
5 files changed, 193 insertions(+)
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index e6f5461fd..c5cc8f017 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -499,6 +499,52 @@ impl DataType {
))
}
}
+ Some(s) if s == "union" => {
+ if let Some(Value::String(mode)) = map.get("mode") {
+ let union_mode = if mode == "SPARSE" {
+ UnionMode::Sparse
+ } else if mode == "DENSE" {
+ UnionMode::Dense
+ } else {
+ return Err(ArrowError::ParseError(format!(
+ "Unknown union mode {:?} for union",
+ mode
+ )));
+ };
+ if let Some(type_ids) = map.get("typeIds") {
+ let type_ids = type_ids
+ .as_array()
+ .unwrap()
+ .iter()
+ .map(|t| t.as_i64().unwrap())
+ .collect::<Vec<_>>();
+
+ let default_fields = type_ids
+ .iter()
+ .map(|t| {
+ Field::new("", DataType::Boolean,
true).with_metadata(
+ Some(
+ [("type_id".to_string(),
t.to_string())]
+ .iter()
+ .cloned()
+ .collect(),
+ ),
+ )
+ })
+ .collect::<Vec<_>>();
+
+ Ok(DataType::Union(default_fields, union_mode))
+ } else {
+ Err(ArrowError::ParseError(
+ "Expecting a typeIds for union ".to_string(),
+ ))
+ }
+ } else {
+ Err(ArrowError::ParseError(
+ "Expecting a mode for union".to_string(),
+ ))
+ }
+ }
Some(other) => Err(ArrowError::ParseError(format!(
"invalid or unsupported type name: {} in {:?}",
other, json
diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs
index ded7fc67b..6471f1ed7 100644
--- a/arrow/src/datatypes/field.rs
+++ b/arrow/src/datatypes/field.rs
@@ -390,6 +390,30 @@ impl Field {
}
}
}
+ DataType::Union(fields, mode) => match map.get("children")
{
+ Some(Value::Array(values)) => {
+ let mut union_fields: Vec<Field> =
+
values.iter().map(Field::from).collect::<Result<_>>()?;
+
fields.iter().zip(union_fields.iter_mut()).for_each(
+ |(f, union_field)| {
+ union_field.set_metadata(Some(
+ f.metadata().unwrap().clone(),
+ ));
+ },
+ );
+ DataType::Union(union_fields, mode)
+ }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must be an
array".to_string(),
+ ))
+ }
+ None => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'children'
attribute".to_string(),
+ ));
+ }
+ },
_ => data_type,
};
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index 2c7fc8183..c3015972a 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -392,6 +392,70 @@ mod tests {
assert_eq!(expected, dt);
}
+ #[test]
+ fn parse_union_from_json() {
+ let json = r#"
+ {
+ "name": "my_union",
+ "nullable": false,
+ "type": {
+ "name": "union",
+ "mode": "SPARSE",
+ "typeIds": [
+ 5,
+ 7
+ ]
+ },
+ "children": [
+ {
+ "name": "f1",
+ "type": {
+ "name": "int",
+ "isSigned": true,
+ "bitWidth": 32
+ },
+ "nullable": true,
+ "children": []
+ },
+ {
+ "name": "f2",
+ "type": {
+ "name": "utf8"
+ },
+ "nullable": true,
+ "children": []
+ }
+ ]
+ }
+ "#;
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = Field::from(&value).unwrap();
+
+ let expected = Field::new(
+ "my_union",
+ DataType::Union(
+ vec![
+ Field::new("f1", DataType::Int32, true).with_metadata(Some(
+ [("type_id".to_string(), "5".to_string())]
+ .iter()
+ .cloned()
+ .collect(),
+ )),
+ Field::new("f2", DataType::Utf8, true).with_metadata(Some(
+ [("type_id".to_string(), "7".to_string())]
+ .iter()
+ .cloned()
+ .collect(),
+ )),
+ ],
+ UnionMode::Sparse,
+ ),
+ false,
+ );
+
+ assert_eq!(expected, dt);
+ }
+
#[test]
fn parse_utf8_from_json() {
let json = "{\"name\":\"utf8\"}";
diff --git a/arrow/src/util/integration_util.rs
b/arrow/src/util/integration_util.rs
index f19ceb4bb..a174da6ea 100644
--- a/arrow/src/util/integration_util.rs
+++ b/arrow/src/util/integration_util.rs
@@ -132,6 +132,8 @@ pub struct ArrowJsonColumn {
pub data: Option<Vec<Value>>,
#[serde(rename = "OFFSET")]
pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are
strings
+ #[serde(rename = "TYPE_ID")]
+ pub type_id: Option<Vec<i8>>,
pub children: Option<Vec<ArrowJsonColumn>>,
}
@@ -472,6 +474,7 @@ impl ArrowJsonBatch {
validity: Some(validity),
data: Some(data),
offset: None,
+ type_id: None,
children: None,
}
}
@@ -481,6 +484,7 @@ impl ArrowJsonBatch {
validity: None,
data: None,
offset: None,
+ type_id: None,
children: None,
},
};
diff --git a/integration-testing/src/lib.rs b/integration-testing/src/lib.rs
index 43b7cec01..c70459938 100644
--- a/integration-testing/src/lib.rs
+++ b/integration-testing/src/lib.rs
@@ -632,6 +632,61 @@ fn array_from_json(
let array = MapArray::from(array_data);
Ok(Arc::new(array))
}
+ DataType::Union(fields, _) => {
+ let field_type_ids = fields
+ .iter()
+ .enumerate()
+ .into_iter()
+ .map(|(idx, f)| {
+ (
+ f.metadata()
+ .and_then(|m| m.get("type_id"))
+ .unwrap()
+ .parse::<i8>()
+ .unwrap(),
+ idx,
+ )
+ })
+ .collect::<HashMap<_, _>>();
+
+ let type_ids = if let Some(type_id) = json_col.type_id {
+ type_id
+ .iter()
+ .map(|t| {
+ if field_type_ids.contains_key(t) {
+ Ok(*(field_type_ids.get(t).unwrap()) as i8)
+ } else {
+ Err(ArrowError::JsonError(format!(
+ "Unable to find type id {:?}",
+ t
+ )))
+ }
+ })
+ .collect::<Result<_>>()?
+ } else {
+ vec![]
+ };
+
+ let offset: Option<Buffer> = json_col.offset.map(|offsets| {
+ let offsets: Vec<i32> =
+ offsets.iter().map(|v| v.as_i64().unwrap() as
i32).collect();
+ Buffer::from(&offsets.to_byte_slice())
+ });
+
+ let mut children: Vec<(Field, Arc<dyn Array>)> = vec![];
+ for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
+ let array = array_from_json(field, col, dictionaries)?;
+ children.push((field.clone(), array));
+ }
+
+ let array = UnionArray::try_new(
+ Buffer::from(&type_ids.to_byte_slice()),
+ offset,
+ children,
+ )
+ .unwrap();
+ Ok(Arc::new(array))
+ }
t => Err(ArrowError::JsonError(format!(
"data type {:?} not supported",
t