This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 51466634f Move JSON Test Format To integration-testing (#2724)
51466634f is described below
commit 51466634f11b7d965ca3c912835c91e0f84a6c92
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Wed Sep 14 14:42:11 2022 +0100
Move JSON Test Format To integration-testing (#2724)
* Move JSON Test Format To integration-testing
* Fix RAT
---
arrow/src/datatypes/datatype.rs | 344 ---------
arrow/src/datatypes/field.rs | 277 -------
arrow/src/datatypes/mod.rs | 938 +----------------------
arrow/src/datatypes/schema.rs | 81 --
integration-testing/src/lib.rs | 2 +-
integration-testing/src/util/datatype.rs | 383 +++++++++
integration-testing/src/util/field.rs | 586 ++++++++++++++
integration-testing/src/{util.rs => util/mod.rs} | 14 +-
integration-testing/src/util/schema.rs | 733 ++++++++++++++++++
9 files changed, 1716 insertions(+), 1642 deletions(-)
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index 04d139b67..2ca71ef77 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -1052,350 +1052,6 @@ pub(crate) fn
validate_decimal256_precision_with_lt_bytes(
}
impl DataType {
- /// Parse a data type from a JSON representation.
- #[cfg(feature = "json")]
- pub(crate) fn from(json: &serde_json::Value) -> Result<DataType> {
- use serde_json::Value;
- let default_field = Field::new("", DataType::Boolean, true);
- match *json {
- Value::Object(ref map) => match map.get("name") {
- Some(s) if s == "null" => Ok(DataType::Null),
- Some(s) if s == "bool" => Ok(DataType::Boolean),
- Some(s) if s == "binary" => Ok(DataType::Binary),
- Some(s) if s == "largebinary" => Ok(DataType::LargeBinary),
- Some(s) if s == "utf8" => Ok(DataType::Utf8),
- Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8),
- Some(s) if s == "fixedsizebinary" => {
- // return a list with any type as its child isn't defined
in the map
- if let Some(Value::Number(size)) = map.get("byteWidth") {
- Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as
i32))
- } else {
- Err(ArrowError::ParseError(
- "Expecting a byteWidth for
fixedsizebinary".to_string(),
- ))
- }
- }
- Some(s) if s == "decimal" => {
- // return a list with any type as its child isn't defined
in the map
- let precision = match map.get("precision") {
- Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()),
- None => Err(ArrowError::ParseError(
- "Expecting a precision for decimal".to_string(),
- )),
- }?;
- let scale = match map.get("scale") {
- Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()),
- _ => Err(ArrowError::ParseError(
- "Expecting a scale for decimal".to_string(),
- )),
- }?;
- let bit_width: usize = match map.get("bitWidth") {
- Some(b) => b.as_u64().unwrap() as usize,
- _ => 128, // Default bit width
- };
-
- if bit_width == 128 {
- Ok(DataType::Decimal128(precision, scale))
- } else if bit_width == 256 {
- Ok(DataType::Decimal256(precision, scale))
- } else {
- Err(ArrowError::ParseError(
- "Decimal bit_width invalid".to_string(),
- ))
- }
- }
- Some(s) if s == "floatingpoint" => match map.get("precision") {
- Some(p) if p == "HALF" => Ok(DataType::Float16),
- Some(p) if p == "SINGLE" => Ok(DataType::Float32),
- Some(p) if p == "DOUBLE" => Ok(DataType::Float64),
- _ => Err(ArrowError::ParseError(
- "floatingpoint precision missing or
invalid".to_string(),
- )),
- },
- Some(s) if s == "timestamp" => {
- let unit = match map.get("unit") {
- Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
- Some(p) if p == "MILLISECOND" =>
Ok(TimeUnit::Millisecond),
- Some(p) if p == "MICROSECOND" =>
Ok(TimeUnit::Microsecond),
- Some(p) if p == "NANOSECOND" =>
Ok(TimeUnit::Nanosecond),
- _ => Err(ArrowError::ParseError(
- "timestamp unit missing or invalid".to_string(),
- )),
- };
- let tz = match map.get("timezone") {
- None => Ok(None),
- Some(serde_json::Value::String(tz)) =>
Ok(Some(tz.clone())),
- _ => Err(ArrowError::ParseError(
- "timezone must be a string".to_string(),
- )),
- };
- Ok(DataType::Timestamp(unit?, tz?))
- }
- Some(s) if s == "date" => match map.get("unit") {
- Some(p) if p == "DAY" => Ok(DataType::Date32),
- Some(p) if p == "MILLISECOND" => Ok(DataType::Date64),
- _ => Err(ArrowError::ParseError(
- "date unit missing or invalid".to_string(),
- )),
- },
- Some(s) if s == "time" => {
- let unit = match map.get("unit") {
- Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
- Some(p) if p == "MILLISECOND" =>
Ok(TimeUnit::Millisecond),
- Some(p) if p == "MICROSECOND" =>
Ok(TimeUnit::Microsecond),
- Some(p) if p == "NANOSECOND" =>
Ok(TimeUnit::Nanosecond),
- _ => Err(ArrowError::ParseError(
- "time unit missing or invalid".to_string(),
- )),
- };
- match map.get("bitWidth") {
- Some(p) if p == 32 => Ok(DataType::Time32(unit?)),
- Some(p) if p == 64 => Ok(DataType::Time64(unit?)),
- _ => Err(ArrowError::ParseError(
- "time bitWidth missing or invalid".to_string(),
- )),
- }
- }
- Some(s) if s == "duration" => match map.get("unit") {
- Some(p) if p == "SECOND" =>
Ok(DataType::Duration(TimeUnit::Second)),
- Some(p) if p == "MILLISECOND" => {
- Ok(DataType::Duration(TimeUnit::Millisecond))
- }
- Some(p) if p == "MICROSECOND" => {
- Ok(DataType::Duration(TimeUnit::Microsecond))
- }
- Some(p) if p == "NANOSECOND" => {
- Ok(DataType::Duration(TimeUnit::Nanosecond))
- }
- _ => Err(ArrowError::ParseError(
- "time unit missing or invalid".to_string(),
- )),
- },
- Some(s) if s == "interval" => match map.get("unit") {
- Some(p) if p == "DAY_TIME" => {
- Ok(DataType::Interval(IntervalUnit::DayTime))
- }
- Some(p) if p == "YEAR_MONTH" => {
- Ok(DataType::Interval(IntervalUnit::YearMonth))
- }
- Some(p) if p == "MONTH_DAY_NANO" => {
- Ok(DataType::Interval(IntervalUnit::MonthDayNano))
- }
- _ => Err(ArrowError::ParseError(
- "interval unit missing or invalid".to_string(),
- )),
- },
- Some(s) if s == "int" => match map.get("isSigned") {
- Some(&Value::Bool(true)) => match map.get("bitWidth") {
- Some(&Value::Number(ref n)) => match n.as_u64() {
- Some(8) => Ok(DataType::Int8),
- Some(16) => Ok(DataType::Int16),
- Some(32) => Ok(DataType::Int32),
- Some(64) => Ok(DataType::Int64),
- _ => Err(ArrowError::ParseError(
- "int bitWidth missing or invalid".to_string(),
- )),
- },
- _ => Err(ArrowError::ParseError(
- "int bitWidth missing or invalid".to_string(),
- )),
- },
- Some(&Value::Bool(false)) => match map.get("bitWidth") {
- Some(&Value::Number(ref n)) => match n.as_u64() {
- Some(8) => Ok(DataType::UInt8),
- Some(16) => Ok(DataType::UInt16),
- Some(32) => Ok(DataType::UInt32),
- Some(64) => Ok(DataType::UInt64),
- _ => Err(ArrowError::ParseError(
- "int bitWidth missing or invalid".to_string(),
- )),
- },
- _ => Err(ArrowError::ParseError(
- "int bitWidth missing or invalid".to_string(),
- )),
- },
- _ => Err(ArrowError::ParseError(
- "int signed missing or invalid".to_string(),
- )),
- },
- Some(s) if s == "list" => {
- // return a list with any type as its child isn't defined
in the map
- Ok(DataType::List(Box::new(default_field)))
- }
- Some(s) if s == "largelist" => {
- // return a largelist with any type as its child isn't
defined in the map
- Ok(DataType::LargeList(Box::new(default_field)))
- }
- Some(s) if s == "fixedsizelist" => {
- // return a list with any type as its child isn't defined
in the map
- if let Some(Value::Number(size)) = map.get("listSize") {
- Ok(DataType::FixedSizeList(
- Box::new(default_field),
- size.as_i64().unwrap() as i32,
- ))
- } else {
- Err(ArrowError::ParseError(
- "Expecting a listSize for
fixedsizelist".to_string(),
- ))
- }
- }
- Some(s) if s == "struct" => {
- // return an empty `struct` type as its children aren't
defined in the map
- Ok(DataType::Struct(vec![]))
- }
- Some(s) if s == "map" => {
- if let Some(Value::Bool(keys_sorted)) =
map.get("keysSorted") {
- // Return a map with an empty type as its children
aren't defined in the map
- Ok(DataType::Map(Box::new(default_field),
*keys_sorted))
- } else {
- Err(ArrowError::ParseError(
- "Expecting a keysSorted for map".to_string(),
- ))
- }
- }
- Some(s) if s == "union" => {
- if let Some(Value::String(mode)) = map.get("mode") {
- let union_mode = if mode == "SPARSE" {
- UnionMode::Sparse
- } else if mode == "DENSE" {
- UnionMode::Dense
- } else {
- return Err(ArrowError::ParseError(format!(
- "Unknown union mode {:?} for union",
- mode
- )));
- };
- if let Some(type_ids) = map.get("typeIds") {
- let type_ids = type_ids
- .as_array()
- .unwrap()
- .iter()
- .map(|t| t.as_i64().unwrap() as i8)
- .collect::<Vec<_>>();
-
- let default_fields = type_ids
- .iter()
- .map(|_| default_field.clone())
- .collect::<Vec<_>>();
-
- Ok(DataType::Union(default_fields, type_ids,
union_mode))
- } else {
- Err(ArrowError::ParseError(
- "Expecting a typeIds for union ".to_string(),
- ))
- }
- } else {
- Err(ArrowError::ParseError(
- "Expecting a mode for union".to_string(),
- ))
- }
- }
- Some(other) => Err(ArrowError::ParseError(format!(
- "invalid or unsupported type name: {} in {:?}",
- other, json
- ))),
- None => Err(ArrowError::ParseError("type name
missing".to_string())),
- },
- _ => Err(ArrowError::ParseError(
- "invalid json value type".to_string(),
- )),
- }
- }
-
- /// Generate a JSON representation of the data type.
- #[cfg(feature = "json")]
- pub fn to_json(&self) -> serde_json::Value {
- use serde_json::json;
- match self {
- DataType::Null => json!({"name": "null"}),
- DataType::Boolean => json!({"name": "bool"}),
- DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned":
true}),
- DataType::Int16 => json!({"name": "int", "bitWidth": 16,
"isSigned": true}),
- DataType::Int32 => json!({"name": "int", "bitWidth": 32,
"isSigned": true}),
- DataType::Int64 => json!({"name": "int", "bitWidth": 64,
"isSigned": true}),
- DataType::UInt8 => json!({"name": "int", "bitWidth": 8,
"isSigned": false}),
- DataType::UInt16 => json!({"name": "int", "bitWidth": 16,
"isSigned": false}),
- DataType::UInt32 => json!({"name": "int", "bitWidth": 32,
"isSigned": false}),
- DataType::UInt64 => json!({"name": "int", "bitWidth": 64,
"isSigned": false}),
- DataType::Float16 => json!({"name": "floatingpoint", "precision":
"HALF"}),
- DataType::Float32 => json!({"name": "floatingpoint", "precision":
"SINGLE"}),
- DataType::Float64 => json!({"name": "floatingpoint", "precision":
"DOUBLE"}),
- DataType::Utf8 => json!({"name": "utf8"}),
- DataType::LargeUtf8 => json!({"name": "largeutf8"}),
- DataType::Binary => json!({"name": "binary"}),
- DataType::LargeBinary => json!({"name": "largebinary"}),
- DataType::FixedSizeBinary(byte_width) => {
- json!({"name": "fixedsizebinary", "byteWidth": byte_width})
- }
- DataType::Struct(_) => json!({"name": "struct"}),
- DataType::Union(_, _, _) => json!({"name": "union"}),
- DataType::List(_) => json!({ "name": "list"}),
- DataType::LargeList(_) => json!({ "name": "largelist"}),
- DataType::FixedSizeList(_, length) => {
- json!({"name":"fixedsizelist", "listSize": length})
- }
- DataType::Time32(unit) => {
- json!({"name": "time", "bitWidth": 32, "unit": match unit {
- TimeUnit::Second => "SECOND",
- TimeUnit::Millisecond => "MILLISECOND",
- TimeUnit::Microsecond => "MICROSECOND",
- TimeUnit::Nanosecond => "NANOSECOND",
- }})
- }
- DataType::Time64(unit) => {
- json!({"name": "time", "bitWidth": 64, "unit": match unit {
- TimeUnit::Second => "SECOND",
- TimeUnit::Millisecond => "MILLISECOND",
- TimeUnit::Microsecond => "MICROSECOND",
- TimeUnit::Nanosecond => "NANOSECOND",
- }})
- }
- DataType::Date32 => {
- json!({"name": "date", "unit": "DAY"})
- }
- DataType::Date64 => {
- json!({"name": "date", "unit": "MILLISECOND"})
- }
- DataType::Timestamp(unit, None) => {
- json!({"name": "timestamp", "unit": match unit {
- TimeUnit::Second => "SECOND",
- TimeUnit::Millisecond => "MILLISECOND",
- TimeUnit::Microsecond => "MICROSECOND",
- TimeUnit::Nanosecond => "NANOSECOND",
- }})
- }
- DataType::Timestamp(unit, Some(tz)) => {
- json!({"name": "timestamp", "unit": match unit {
- TimeUnit::Second => "SECOND",
- TimeUnit::Millisecond => "MILLISECOND",
- TimeUnit::Microsecond => "MICROSECOND",
- TimeUnit::Nanosecond => "NANOSECOND",
- }, "timezone": tz})
- }
- DataType::Interval(unit) => json!({"name": "interval", "unit":
match unit {
- IntervalUnit::YearMonth => "YEAR_MONTH",
- IntervalUnit::DayTime => "DAY_TIME",
- IntervalUnit::MonthDayNano => "MONTH_DAY_NANO",
- }}),
- DataType::Duration(unit) => json!({"name": "duration", "unit":
match unit {
- TimeUnit::Second => "SECOND",
- TimeUnit::Millisecond => "MILLISECOND",
- TimeUnit::Microsecond => "MICROSECOND",
- TimeUnit::Nanosecond => "NANOSECOND",
- }}),
- DataType::Dictionary(_, _) => json!({ "name": "dictionary"}),
- DataType::Decimal128(precision, scale) => {
- json!({"name": "decimal", "precision": precision, "scale":
scale, "bitWidth": 128})
- }
- DataType::Decimal256(precision, scale) => {
- json!({"name": "decimal", "precision": precision, "scale":
scale, "bitWidth": 256})
- }
- DataType::Map(_, keys_sorted) => {
- json!({"name": "map", "keysSorted": keys_sorted})
- }
- }
- }
-
/// Returns true if this type is numeric: (UInt*, Int*, or Float*).
pub fn is_numeric(t: &DataType) -> bool {
use DataType::*;
diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs
index ac966cafe..03d078077 100644
--- a/arrow/src/datatypes/field.rs
+++ b/arrow/src/datatypes/field.rs
@@ -250,283 +250,6 @@ impl Field {
}
}
- /// Parse a `Field` definition from a JSON representation.
- #[cfg(feature = "json")]
- pub fn from(json: &serde_json::Value) -> Result<Self> {
- use serde_json::Value;
- match *json {
- Value::Object(ref map) => {
- let name = match map.get("name") {
- Some(&Value::String(ref name)) => name.to_string(),
- _ => {
- return Err(ArrowError::ParseError(
- "Field missing 'name' attribute".to_string(),
- ));
- }
- };
- let nullable = match map.get("nullable") {
- Some(&Value::Bool(b)) => b,
- _ => {
- return Err(ArrowError::ParseError(
- "Field missing 'nullable' attribute".to_string(),
- ));
- }
- };
- let data_type = match map.get("type") {
- Some(t) => DataType::from(t)?,
- _ => {
- return Err(ArrowError::ParseError(
- "Field missing 'type' attribute".to_string(),
- ));
- }
- };
-
- // Referenced example file:
testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
- let metadata = match map.get("metadata") {
- Some(&Value::Array(ref values)) => {
- let mut res: BTreeMap<String, String> =
BTreeMap::new();
- for value in values {
- match value.as_object() {
- Some(map) => {
- if map.len() != 2 {
- return Err(ArrowError::ParseError(
- "Field 'metadata' must have exact
two entries for each key-value map".to_string(),
- ));
- }
- if let (Some(k), Some(v)) =
- (map.get("key"), map.get("value"))
- {
- if let (Some(k_str), Some(v_str)) =
- (k.as_str(), v.as_str())
- {
- res.insert(
- k_str.to_string().clone(),
- v_str.to_string().clone(),
- );
- } else {
- return
Err(ArrowError::ParseError("Field 'metadata' must have map value of string
type".to_string()));
- }
- } else {
- return
Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or
\"value\"".to_string()));
- }
- }
- _ => {
- return Err(ArrowError::ParseError(
- "Field 'metadata' contains non-object
key-value pair".to_string(),
- ));
- }
- }
- }
- Some(res)
- }
- // We also support map format, because Schema's metadata
supports this.
- // See https://github.com/apache/arrow/pull/5907
- Some(&Value::Object(ref values)) => {
- let mut res: BTreeMap<String, String> =
BTreeMap::new();
- for (k, v) in values {
- if let Some(str_value) = v.as_str() {
- res.insert(k.clone(),
str_value.to_string().clone());
- } else {
- return Err(ArrowError::ParseError(
- format!("Field 'metadata' contains
non-string value for key {}", k),
- ));
- }
- }
- Some(res)
- }
- Some(_) => {
- return Err(ArrowError::ParseError(
- "Field `metadata` is not json array".to_string(),
- ));
- }
- _ => None,
- };
-
- // if data_type is a struct or list, get its children
- let data_type = match data_type {
- DataType::List(_)
- | DataType::LargeList(_)
- | DataType::FixedSizeList(_, _) => match
map.get("children") {
- Some(Value::Array(values)) => {
- if values.len() != 1 {
- return Err(ArrowError::ParseError(
- "Field 'children' must have one element
for a list data type".to_string(),
- ));
- }
- match data_type {
- DataType::List(_) => {
-
DataType::List(Box::new(Self::from(&values[0])?))
- }
- DataType::LargeList(_) => {
-
DataType::LargeList(Box::new(Self::from(&values[0])?))
- }
- DataType::FixedSizeList(_, int) =>
DataType::FixedSizeList(
- Box::new(Self::from(&values[0])?),
- int,
- ),
- _ => unreachable!(
- "Data type should be a list, largelist
or fixedsizelist"
- ),
- }
- }
- Some(_) => {
- return Err(ArrowError::ParseError(
- "Field 'children' must be an
array".to_string(),
- ))
- }
- None => {
- return Err(ArrowError::ParseError(
- "Field missing 'children'
attribute".to_string(),
- ));
- }
- },
- DataType::Struct(mut fields) => match map.get("children") {
- Some(Value::Array(values)) => {
- let struct_fields: Result<Vec<Field>> =
- values.iter().map(Field::from).collect();
- fields.append(&mut struct_fields?);
- DataType::Struct(fields)
- }
- Some(_) => {
- return Err(ArrowError::ParseError(
- "Field 'children' must be an
array".to_string(),
- ))
- }
- None => {
- return Err(ArrowError::ParseError(
- "Field missing 'children'
attribute".to_string(),
- ));
- }
- },
- DataType::Map(_, keys_sorted) => {
- match map.get("children") {
- Some(Value::Array(values)) if values.len() == 1 =>
{
- let child = Self::from(&values[0])?;
- // child must be a struct
- match child.data_type() {
- DataType::Struct(map_fields) if
map_fields.len() == 2 => {
- DataType::Map(Box::new(child),
keys_sorted)
- }
- t => {
- return Err(ArrowError::ParseError(
- format!("Map children should be a
struct with 2 fields, found {:?}", t)
- ))
- }
- }
- }
- Some(_) => {
- return Err(ArrowError::ParseError(
- "Field 'children' must be an array with 1
element"
- .to_string(),
- ))
- }
- None => {
- return Err(ArrowError::ParseError(
- "Field missing 'children'
attribute".to_string(),
- ));
- }
- }
- }
- DataType::Union(_, type_ids, mode) => match
map.get("children") {
- Some(Value::Array(values)) => {
- let union_fields: Vec<Field> =
-
values.iter().map(Field::from).collect::<Result<_>>()?;
- DataType::Union(union_fields, type_ids, mode)
- }
- Some(_) => {
- return Err(ArrowError::ParseError(
- "Field 'children' must be an
array".to_string(),
- ))
- }
- None => {
- return Err(ArrowError::ParseError(
- "Field missing 'children'
attribute".to_string(),
- ));
- }
- },
- _ => data_type,
- };
-
- let mut dict_id = 0;
- let mut dict_is_ordered = false;
-
- let data_type = match map.get("dictionary") {
- Some(dictionary) => {
- let index_type = match dictionary.get("indexType") {
- Some(t) => DataType::from(t)?,
- _ => {
- return Err(ArrowError::ParseError(
- "Field missing 'indexType'
attribute".to_string(),
- ));
- }
- };
- dict_id = match dictionary.get("id") {
- Some(Value::Number(n)) => n.as_i64().unwrap(),
- _ => {
- return Err(ArrowError::ParseError(
- "Field missing 'id' attribute".to_string(),
- ));
- }
- };
- dict_is_ordered = match dictionary.get("isOrdered") {
- Some(&Value::Bool(n)) => n,
- _ => {
- return Err(ArrowError::ParseError(
- "Field missing 'isOrdered'
attribute".to_string(),
- ));
- }
- };
- DataType::Dictionary(Box::new(index_type),
Box::new(data_type))
- }
- _ => data_type,
- };
- Ok(Field {
- name,
- data_type,
- nullable,
- dict_id,
- dict_is_ordered,
- metadata,
- })
- }
- _ => Err(ArrowError::ParseError(
- "Invalid json value type for field".to_string(),
- )),
- }
- }
-
- /// Generate a JSON representation of the `Field`.
- #[cfg(feature = "json")]
- pub fn to_json(&self) -> serde_json::Value {
- let children: Vec<serde_json::Value> = match self.data_type() {
- DataType::Struct(fields) => fields.iter().map(|f|
f.to_json()).collect(),
- DataType::List(field)
- | DataType::LargeList(field)
- | DataType::FixedSizeList(field, _)
- | DataType::Map(field, _) => vec![field.to_json()],
- _ => vec![],
- };
- match self.data_type() {
- DataType::Dictionary(ref index_type, ref value_type) =>
serde_json::json!({
- "name": self.name,
- "nullable": self.nullable,
- "type": value_type.to_json(),
- "children": children,
- "dictionary": {
- "id": self.dict_id,
- "indexType": index_type.to_json(),
- "isOrdered": self.dict_is_ordered
- }
- }),
- _ => serde_json::json!({
- "name": self.name,
- "nullable": self.nullable,
- "type": self.data_type.to_json(),
- "children": children
- }),
- }
- }
-
/// Merge this field into self if it is compatible.
///
/// Struct fields are merged recursively.
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index 38b6c7bf9..1586d563c 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -57,7 +57,7 @@ mod tests {
#[cfg(feature = "json")]
use serde_json::{
- Number, Value,
+ Number,
Value::{Bool, Number as VNumber, String as VString},
};
@@ -174,942 +174,6 @@ mod tests {
assert_eq!(person, deserialized);
}
- #[test]
- #[cfg(feature = "json")]
- fn struct_field_to_json() {
- let f = Field::new(
- "address",
- DataType::Struct(vec![
- Field::new("street", DataType::Utf8, false),
- Field::new("zip", DataType::UInt16, false),
- ]),
- false,
- );
- let value: Value = serde_json::from_str(
- r#"{
- "name": "address",
- "nullable": false,
- "type": {
- "name": "struct"
- },
- "children": [
- {
- "name": "street",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- },
- {
- "name": "zip",
- "nullable": false,
- "type": {
- "name": "int",
- "bitWidth": 16,
- "isSigned": false
- },
- "children": []
- }
- ]
- }"#,
- )
- .unwrap();
- assert_eq!(value, f.to_json());
- }
-
- #[test]
- #[cfg(feature = "json")]
- fn map_field_to_json() {
- let f = Field::new(
- "my_map",
- DataType::Map(
- Box::new(Field::new(
- "my_entries",
- DataType::Struct(vec![
- Field::new("my_keys", DataType::Utf8, false),
- Field::new("my_values", DataType::UInt16, true),
- ]),
- false,
- )),
- true,
- ),
- false,
- );
- let value: Value = serde_json::from_str(
- r#"{
- "name": "my_map",
- "nullable": false,
- "type": {
- "name": "map",
- "keysSorted": true
- },
- "children": [
- {
- "name": "my_entries",
- "nullable": false,
- "type": {
- "name": "struct"
- },
- "children": [
- {
- "name": "my_keys",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- },
- {
- "name": "my_values",
- "nullable": true,
- "type": {
- "name": "int",
- "bitWidth": 16,
- "isSigned": false
- },
- "children": []
- }
- ]
- }
- ]
- }"#,
- )
- .unwrap();
- assert_eq!(value, f.to_json());
- }
-
- #[test]
- #[cfg(feature = "json")]
- fn primitive_field_to_json() {
- let f = Field::new("first_name", DataType::Utf8, false);
- let value: Value = serde_json::from_str(
- r#"{
- "name": "first_name",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- }"#,
- )
- .unwrap();
- assert_eq!(value, f.to_json());
- }
- #[test]
- #[cfg(feature = "json")]
- fn parse_struct_from_json() {
- let json = r#"
- {
- "name": "address",
- "type": {
- "name": "struct"
- },
- "nullable": false,
- "children": [
- {
- "name": "street",
- "type": {
- "name": "utf8"
- },
- "nullable": false,
- "children": []
- },
- {
- "name": "zip",
- "type": {
- "name": "int",
- "isSigned": false,
- "bitWidth": 16
- },
- "nullable": false,
- "children": []
- }
- ]
- }
- "#;
- let value: Value = serde_json::from_str(json).unwrap();
- let dt = Field::from(&value).unwrap();
-
- let expected = Field::new(
- "address",
- DataType::Struct(vec![
- Field::new("street", DataType::Utf8, false),
- Field::new("zip", DataType::UInt16, false),
- ]),
- false,
- );
-
- assert_eq!(expected, dt);
- }
-
- #[test]
- #[cfg(feature = "json")]
- fn parse_map_from_json() {
- let json = r#"
- {
- "name": "my_map",
- "nullable": false,
- "type": {
- "name": "map",
- "keysSorted": true
- },
- "children": [
- {
- "name": "my_entries",
- "nullable": false,
- "type": {
- "name": "struct"
- },
- "children": [
- {
- "name": "my_keys",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- },
- {
- "name": "my_values",
- "nullable": true,
- "type": {
- "name": "int",
- "bitWidth": 16,
- "isSigned": false
- },
- "children": []
- }
- ]
- }
- ]
- }
- "#;
- let value: Value = serde_json::from_str(json).unwrap();
- let dt = Field::from(&value).unwrap();
-
- let expected = Field::new(
- "my_map",
- DataType::Map(
- Box::new(Field::new(
- "my_entries",
- DataType::Struct(vec![
- Field::new("my_keys", DataType::Utf8, false),
- Field::new("my_values", DataType::UInt16, true),
- ]),
- false,
- )),
- true,
- ),
- false,
- );
-
- assert_eq!(expected, dt);
- }
-
- #[test]
- #[cfg(feature = "json")]
- fn parse_union_from_json() {
- let json = r#"
- {
- "name": "my_union",
- "nullable": false,
- "type": {
- "name": "union",
- "mode": "SPARSE",
- "typeIds": [
- 5,
- 7
- ]
- },
- "children": [
- {
- "name": "f1",
- "type": {
- "name": "int",
- "isSigned": true,
- "bitWidth": 32
- },
- "nullable": true,
- "children": []
- },
- {
- "name": "f2",
- "type": {
- "name": "utf8"
- },
- "nullable": true,
- "children": []
- }
- ]
- }
- "#;
- let value: Value = serde_json::from_str(json).unwrap();
- let dt = Field::from(&value).unwrap();
-
- let expected = Field::new(
- "my_union",
- DataType::Union(
- vec![
- Field::new("f1", DataType::Int32, true),
- Field::new("f2", DataType::Utf8, true),
- ],
- vec![5, 7],
- UnionMode::Sparse,
- ),
- false,
- );
-
- assert_eq!(expected, dt);
- }
-
- #[test]
- #[cfg(feature = "json")]
- fn parse_utf8_from_json() {
- let json = "{\"name\":\"utf8\"}";
- let value: Value = serde_json::from_str(json).unwrap();
- let dt = DataType::from(&value).unwrap();
- assert_eq!(DataType::Utf8, dt);
- }
-
- #[test]
- #[cfg(feature = "json")]
- fn parse_int32_from_json() {
- let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
- let value: Value = serde_json::from_str(json).unwrap();
- let dt = DataType::from(&value).unwrap();
- assert_eq!(DataType::Int32, dt);
- }
-
- #[test]
- #[cfg(feature = "json")]
- fn schema_json() {
- // Add some custom metadata
- let metadata: HashMap<String, String> =
- [("Key".to_string(), "Value".to_string())]
- .iter()
- .cloned()
- .collect();
-
- let schema = Schema::new_with_metadata(
- vec![
- Field::new("c1", DataType::Utf8, false),
- Field::new("c2", DataType::Binary, false),
- Field::new("c3", DataType::FixedSizeBinary(3), false),
- Field::new("c4", DataType::Boolean, false),
- Field::new("c5", DataType::Date32, false),
- Field::new("c6", DataType::Date64, false),
- Field::new("c7", DataType::Time32(TimeUnit::Second), false),
- Field::new("c8", DataType::Time32(TimeUnit::Millisecond),
false),
- Field::new("c9", DataType::Time32(TimeUnit::Microsecond),
false),
- Field::new("c10", DataType::Time32(TimeUnit::Nanosecond),
false),
- Field::new("c11", DataType::Time64(TimeUnit::Second), false),
- Field::new("c12", DataType::Time64(TimeUnit::Millisecond),
false),
- Field::new("c13", DataType::Time64(TimeUnit::Microsecond),
false),
- Field::new("c14", DataType::Time64(TimeUnit::Nanosecond),
false),
- Field::new("c15", DataType::Timestamp(TimeUnit::Second, None),
false),
- Field::new(
- "c16",
- DataType::Timestamp(TimeUnit::Millisecond,
Some("UTC".to_string())),
- false,
- ),
- Field::new(
- "c17",
- DataType::Timestamp(
- TimeUnit::Microsecond,
- Some("Africa/Johannesburg".to_string()),
- ),
- false,
- ),
- Field::new(
- "c18",
- DataType::Timestamp(TimeUnit::Nanosecond, None),
- false,
- ),
- Field::new("c19", DataType::Interval(IntervalUnit::DayTime),
false),
- Field::new("c20", DataType::Interval(IntervalUnit::YearMonth),
false),
- Field::new("c21",
DataType::Interval(IntervalUnit::MonthDayNano), false),
- Field::new(
- "c22",
- DataType::List(Box::new(Field::new("item",
DataType::Boolean, true))),
- false,
- ),
- Field::new(
- "c23",
- DataType::FixedSizeList(
- Box::new(Field::new("bools", DataType::Boolean,
false)),
- 5,
- ),
- false,
- ),
- Field::new(
- "c24",
- DataType::List(Box::new(Field::new(
- "inner_list",
- DataType::List(Box::new(Field::new(
- "struct",
- DataType::Struct(vec![]),
- true,
- ))),
- false,
- ))),
- true,
- ),
- Field::new(
- "c25",
- DataType::Struct(vec![
- Field::new("a", DataType::Utf8, false),
- Field::new("b", DataType::UInt16, false),
- ]),
- false,
- ),
- Field::new("c26", DataType::Interval(IntervalUnit::YearMonth),
true),
- Field::new("c27", DataType::Interval(IntervalUnit::DayTime),
true),
- Field::new("c28",
DataType::Interval(IntervalUnit::MonthDayNano), true),
- Field::new("c29", DataType::Duration(TimeUnit::Second), false),
- Field::new("c30", DataType::Duration(TimeUnit::Millisecond),
false),
- Field::new("c31", DataType::Duration(TimeUnit::Microsecond),
false),
- Field::new("c32", DataType::Duration(TimeUnit::Nanosecond),
false),
- Field::new_dict(
- "c33",
- DataType::Dictionary(
- Box::new(DataType::Int32),
- Box::new(DataType::Utf8),
- ),
- true,
- 123,
- true,
- ),
- Field::new("c34", DataType::LargeBinary, true),
- Field::new("c35", DataType::LargeUtf8, true),
- Field::new(
- "c36",
- DataType::LargeList(Box::new(Field::new(
- "inner_large_list",
- DataType::LargeList(Box::new(Field::new(
- "struct",
- DataType::Struct(vec![]),
- false,
- ))),
- true,
- ))),
- true,
- ),
- Field::new(
- "c37",
- DataType::Map(
- Box::new(Field::new(
- "my_entries",
- DataType::Struct(vec![
- Field::new("my_keys", DataType::Utf8, false),
- Field::new("my_values", DataType::UInt16,
true),
- ]),
- false,
- )),
- true,
- ),
- false,
- ),
- ],
- metadata,
- );
-
- let expected = schema.to_json();
- let json = r#"{
- "fields": [
- {
- "name": "c1",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- },
- {
- "name": "c2",
- "nullable": false,
- "type": {
- "name": "binary"
- },
- "children": []
- },
- {
- "name": "c3",
- "nullable": false,
- "type": {
- "name": "fixedsizebinary",
- "byteWidth": 3
- },
- "children": []
- },
- {
- "name": "c4",
- "nullable": false,
- "type": {
- "name": "bool"
- },
- "children": []
- },
- {
- "name": "c5",
- "nullable": false,
- "type": {
- "name": "date",
- "unit": "DAY"
- },
- "children": []
- },
- {
- "name": "c6",
- "nullable": false,
- "type": {
- "name": "date",
- "unit": "MILLISECOND"
- },
- "children": []
- },
- {
- "name": "c7",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 32,
- "unit": "SECOND"
- },
- "children": []
- },
- {
- "name": "c8",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 32,
- "unit": "MILLISECOND"
- },
- "children": []
- },
- {
- "name": "c9",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 32,
- "unit": "MICROSECOND"
- },
- "children": []
- },
- {
- "name": "c10",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 32,
- "unit": "NANOSECOND"
- },
- "children": []
- },
- {
- "name": "c11",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 64,
- "unit": "SECOND"
- },
- "children": []
- },
- {
- "name": "c12",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 64,
- "unit": "MILLISECOND"
- },
- "children": []
- },
- {
- "name": "c13",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 64,
- "unit": "MICROSECOND"
- },
- "children": []
- },
- {
- "name": "c14",
- "nullable": false,
- "type": {
- "name": "time",
- "bitWidth": 64,
- "unit": "NANOSECOND"
- },
- "children": []
- },
- {
- "name": "c15",
- "nullable": false,
- "type": {
- "name": "timestamp",
- "unit": "SECOND"
- },
- "children": []
- },
- {
- "name": "c16",
- "nullable": false,
- "type": {
- "name": "timestamp",
- "unit": "MILLISECOND",
- "timezone": "UTC"
- },
- "children": []
- },
- {
- "name": "c17",
- "nullable": false,
- "type": {
- "name": "timestamp",
- "unit": "MICROSECOND",
- "timezone": "Africa/Johannesburg"
- },
- "children": []
- },
- {
- "name": "c18",
- "nullable": false,
- "type": {
- "name": "timestamp",
- "unit": "NANOSECOND"
- },
- "children": []
- },
- {
- "name": "c19",
- "nullable": false,
- "type": {
- "name": "interval",
- "unit": "DAY_TIME"
- },
- "children": []
- },
- {
- "name": "c20",
- "nullable": false,
- "type": {
- "name": "interval",
- "unit": "YEAR_MONTH"
- },
- "children": []
- },
- {
- "name": "c21",
- "nullable": false,
- "type": {
- "name": "interval",
- "unit": "MONTH_DAY_NANO"
- },
- "children": []
- },
- {
- "name": "c22",
- "nullable": false,
- "type": {
- "name": "list"
- },
- "children": [
- {
- "name": "item",
- "nullable": true,
- "type": {
- "name": "bool"
- },
- "children": []
- }
- ]
- },
- {
- "name": "c23",
- "nullable": false,
- "type": {
- "name": "fixedsizelist",
- "listSize": 5
- },
- "children": [
- {
- "name": "bools",
- "nullable": false,
- "type": {
- "name": "bool"
- },
- "children": []
- }
- ]
- },
- {
- "name": "c24",
- "nullable": true,
- "type": {
- "name": "list"
- },
- "children": [
- {
- "name": "inner_list",
- "nullable": false,
- "type": {
- "name": "list"
- },
- "children": [
- {
- "name": "struct",
- "nullable": true,
- "type": {
- "name": "struct"
- },
- "children": []
- }
- ]
- }
- ]
- },
- {
- "name": "c25",
- "nullable": false,
- "type": {
- "name": "struct"
- },
- "children": [
- {
- "name": "a",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- },
- {
- "name": "b",
- "nullable": false,
- "type": {
- "name": "int",
- "bitWidth": 16,
- "isSigned": false
- },
- "children": []
- }
- ]
- },
- {
- "name": "c26",
- "nullable": true,
- "type": {
- "name": "interval",
- "unit": "YEAR_MONTH"
- },
- "children": []
- },
- {
- "name": "c27",
- "nullable": true,
- "type": {
- "name": "interval",
- "unit": "DAY_TIME"
- },
- "children": []
- },
- {
- "name": "c28",
- "nullable": true,
- "type": {
- "name": "interval",
- "unit": "MONTH_DAY_NANO"
- },
- "children": []
- },
- {
- "name": "c29",
- "nullable": false,
- "type": {
- "name": "duration",
- "unit": "SECOND"
- },
- "children": []
- },
- {
- "name": "c30",
- "nullable": false,
- "type": {
- "name": "duration",
- "unit": "MILLISECOND"
- },
- "children": []
- },
- {
- "name": "c31",
- "nullable": false,
- "type": {
- "name": "duration",
- "unit": "MICROSECOND"
- },
- "children": []
- },
- {
- "name": "c32",
- "nullable": false,
- "type": {
- "name": "duration",
- "unit": "NANOSECOND"
- },
- "children": []
- },
- {
- "name": "c33",
- "nullable": true,
- "children": [],
- "type": {
- "name": "utf8"
- },
- "dictionary": {
- "id": 123,
- "indexType": {
- "name": "int",
- "bitWidth": 32,
- "isSigned": true
- },
- "isOrdered": true
- }
- },
- {
- "name": "c34",
- "nullable": true,
- "type": {
- "name": "largebinary"
- },
- "children": []
- },
- {
- "name": "c35",
- "nullable": true,
- "type": {
- "name": "largeutf8"
- },
- "children": []
- },
- {
- "name": "c36",
- "nullable": true,
- "type": {
- "name": "largelist"
- },
- "children": [
- {
- "name": "inner_large_list",
- "nullable": true,
- "type": {
- "name": "largelist"
- },
- "children": [
- {
- "name": "struct",
- "nullable": false,
- "type": {
- "name": "struct"
- },
- "children": []
- }
- ]
- }
- ]
- },
- {
- "name": "c37",
- "nullable": false,
- "type": {
- "name": "map",
- "keysSorted": true
- },
- "children": [
- {
- "name": "my_entries",
- "nullable": false,
- "type": {
- "name": "struct"
- },
- "children": [
- {
- "name": "my_keys",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- },
- {
- "name": "my_values",
- "nullable": true,
- "type": {
- "name": "int",
- "bitWidth": 16,
- "isSigned": false
- },
- "children": []
- }
- ]
- }
- ]
- }
- ],
- "metadata" : {
- "Key": "Value"
- }
- }"#;
- let value: Value = serde_json::from_str(json).unwrap();
- assert_eq!(expected, value);
-
- // convert back to a schema
- let value: Value = serde_json::from_str(json).unwrap();
- let schema2 = Schema::from(&value).unwrap();
-
- assert_eq!(schema, schema2);
-
- // Check that empty metadata produces empty value in JSON and can be
parsed
- let json = r#"{
- "fields": [
- {
- "name": "c1",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- }
- ],
- "metadata": {}
- }"#;
- let value: Value = serde_json::from_str(json).unwrap();
- let schema = Schema::from(&value).unwrap();
- assert!(schema.metadata.is_empty());
-
- // Check that metadata field is not required in the JSON.
- let json = r#"{
- "fields": [
- {
- "name": "c1",
- "nullable": false,
- "type": {
- "name": "utf8"
- },
- "children": []
- }
- ]
- }"#;
- let value: Value = serde_json::from_str(json).unwrap();
- let schema = Schema::from(&value).unwrap();
- assert!(schema.metadata.is_empty());
- }
-
#[test]
fn create_schema_string() {
let schema = person_schema();
diff --git a/arrow/src/datatypes/schema.rs b/arrow/src/datatypes/schema.rs
index efde4edef..b0eca6114 100644
--- a/arrow/src/datatypes/schema.rs
+++ b/arrow/src/datatypes/schema.rs
@@ -233,80 +233,6 @@ impl Schema {
.find(|&(_, c)| c.name() == name)
}
- /// Generate a JSON representation of the `Schema`.
- #[cfg(feature = "json")]
- pub fn to_json(&self) -> serde_json::Value {
- serde_json::json!({
- "fields": self.fields.iter().map(|field|
field.to_json()).collect::<Vec<serde_json::Value>>(),
- "metadata": serde_json::to_value(&self.metadata).unwrap()
- })
- }
-
- /// Parse a `Schema` definition from a JSON representation.
- #[cfg(feature = "json")]
- pub fn from(json: &serde_json::Value) -> Result<Self> {
- use serde_json::Value;
- match *json {
- Value::Object(ref schema) => {
- let fields = if let Some(Value::Array(fields)) =
schema.get("fields") {
- fields.iter().map(Field::from).collect::<Result<_>>()?
- } else {
- return Err(ArrowError::ParseError(
- "Schema fields should be an array".to_string(),
- ));
- };
-
- let metadata = if let Some(value) = schema.get("metadata") {
- Self::from_metadata(value)?
- } else {
- HashMap::default()
- };
-
- Ok(Self { fields, metadata })
- }
- _ => Err(ArrowError::ParseError(
- "Invalid json value type for schema".to_string(),
- )),
- }
- }
-
- /// Parse a `metadata` definition from a JSON representation.
- /// The JSON can either be an Object or an Array of Objects.
- #[cfg(feature = "json")]
- fn from_metadata(json: &serde_json::Value) -> Result<HashMap<String,
String>> {
- use serde_json::Value;
- match json {
- Value::Array(_) => {
- let mut hashmap = HashMap::new();
- let values: Vec<MetadataKeyValue> =
serde_json::from_value(json.clone())
- .map_err(|_| {
- ArrowError::JsonError(
- "Unable to parse object into key-value
pair".to_string(),
- )
- })?;
- for meta in values {
- hashmap.insert(meta.key.clone(), meta.value);
- }
- Ok(hashmap)
- }
- Value::Object(md) => md
- .iter()
- .map(|(k, v)| {
- if let Value::String(v) = v {
- Ok((k.to_string(), v.to_string()))
- } else {
- Err(ArrowError::ParseError(
- "metadata `value` field must be a
string".to_string(),
- ))
- }
- })
- .collect::<Result<_>>(),
- _ => Err(ArrowError::ParseError(
- "`metadata` field must be an object".to_string(),
- )),
- }
- }
-
/// Check to see if `self` is a superset of `other` schema. Here are the
comparison rules:
///
/// * `self` and `other` should contain the same number of fields
@@ -355,13 +281,6 @@ impl Hash for Schema {
}
}
-#[cfg(feature = "json")]
-#[derive(serde::Deserialize)]
-struct MetadataKeyValue {
- key: String,
- value: String,
-}
-
#[cfg(test)]
mod tests {
use crate::datatypes::DataType;
diff --git a/integration-testing/src/lib.rs b/integration-testing/src/lib.rs
index ffe112af7..2345f1967 100644
--- a/integration-testing/src/lib.rs
+++ b/integration-testing/src/lib.rs
@@ -50,7 +50,7 @@ pub fn read_json_file(json_name: &str) -> Result<ArrowFile> {
let json_file = File::open(json_name)?;
let reader = BufReader::new(json_file);
let arrow_json: Value = serde_json::from_reader(reader).unwrap();
- let schema = Schema::from(&arrow_json["schema"])?;
+ let schema = schema_from_json(&arrow_json["schema"])?;
// read dictionaries
let mut dictionaries = HashMap::new();
if let Some(dicts) = arrow_json.get("dictionaries") {
diff --git a/integration-testing/src/util/datatype.rs
b/integration-testing/src/util/datatype.rs
new file mode 100644
index 000000000..dd0b95b0a
--- /dev/null
+++ b/integration-testing/src/util/datatype.rs
@@ -0,0 +1,383 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit, UnionMode};
+use arrow::error::{ArrowError, Result};
+
+/// Parse a data type from a JSON representation.
+pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
+ use serde_json::Value;
+ let default_field = Field::new("", DataType::Boolean, true);
+ match *json {
+ Value::Object(ref map) => match map.get("name") {
+ Some(s) if s == "null" => Ok(DataType::Null),
+ Some(s) if s == "bool" => Ok(DataType::Boolean),
+ Some(s) if s == "binary" => Ok(DataType::Binary),
+ Some(s) if s == "largebinary" => Ok(DataType::LargeBinary),
+ Some(s) if s == "utf8" => Ok(DataType::Utf8),
+ Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8),
+ Some(s) if s == "fixedsizebinary" => {
+ // return a list with any type as its child isn't defined in
the map
+ if let Some(Value::Number(size)) = map.get("byteWidth") {
+ Ok(DataType::FixedSizeBinary(size.as_i64().unwrap() as
i32))
+ } else {
+ Err(ArrowError::ParseError(
+ "Expecting a byteWidth for
fixedsizebinary".to_string(),
+ ))
+ }
+ }
+ Some(s) if s == "decimal" => {
+ // return a list with any type as its child isn't defined in
the map
+ let precision = match map.get("precision") {
+ Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()),
+ None => Err(ArrowError::ParseError(
+ "Expecting a precision for decimal".to_string(),
+ )),
+ }?;
+ let scale = match map.get("scale") {
+ Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()),
+ _ => Err(ArrowError::ParseError(
+ "Expecting a scale for decimal".to_string(),
+ )),
+ }?;
+ let bit_width: usize = match map.get("bitWidth") {
+ Some(b) => b.as_u64().unwrap() as usize,
+ _ => 128, // Default bit width
+ };
+
+ if bit_width == 128 {
+ Ok(DataType::Decimal128(precision, scale))
+ } else if bit_width == 256 {
+ Ok(DataType::Decimal256(precision, scale))
+ } else {
+ Err(ArrowError::ParseError(
+ "Decimal bit_width invalid".to_string(),
+ ))
+ }
+ }
+ Some(s) if s == "floatingpoint" => match map.get("precision") {
+ Some(p) if p == "HALF" => Ok(DataType::Float16),
+ Some(p) if p == "SINGLE" => Ok(DataType::Float32),
+ Some(p) if p == "DOUBLE" => Ok(DataType::Float64),
+ _ => Err(ArrowError::ParseError(
+ "floatingpoint precision missing or invalid".to_string(),
+ )),
+ },
+ Some(s) if s == "timestamp" => {
+ let unit = match map.get("unit") {
+ Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
+ Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
+ Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
+ Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
+ _ => Err(ArrowError::ParseError(
+ "timestamp unit missing or invalid".to_string(),
+ )),
+ };
+ let tz = match map.get("timezone") {
+ None => Ok(None),
+ Some(serde_json::Value::String(tz)) =>
Ok(Some(tz.clone())),
+ _ => Err(ArrowError::ParseError(
+ "timezone must be a string".to_string(),
+ )),
+ };
+ Ok(DataType::Timestamp(unit?, tz?))
+ }
+ Some(s) if s == "date" => match map.get("unit") {
+ Some(p) if p == "DAY" => Ok(DataType::Date32),
+ Some(p) if p == "MILLISECOND" => Ok(DataType::Date64),
+ _ => Err(ArrowError::ParseError(
+ "date unit missing or invalid".to_string(),
+ )),
+ },
+ Some(s) if s == "time" => {
+ let unit = match map.get("unit") {
+ Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
+ Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
+ Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
+ Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
+ _ => Err(ArrowError::ParseError(
+ "time unit missing or invalid".to_string(),
+ )),
+ };
+ match map.get("bitWidth") {
+ Some(p) if p == 32 => Ok(DataType::Time32(unit?)),
+ Some(p) if p == 64 => Ok(DataType::Time64(unit?)),
+ _ => Err(ArrowError::ParseError(
+ "time bitWidth missing or invalid".to_string(),
+ )),
+ }
+ }
+ Some(s) if s == "duration" => match map.get("unit") {
+ Some(p) if p == "SECOND" =>
Ok(DataType::Duration(TimeUnit::Second)),
+ Some(p) if p == "MILLISECOND" => {
+ Ok(DataType::Duration(TimeUnit::Millisecond))
+ }
+ Some(p) if p == "MICROSECOND" => {
+ Ok(DataType::Duration(TimeUnit::Microsecond))
+ }
+ Some(p) if p == "NANOSECOND" => {
+ Ok(DataType::Duration(TimeUnit::Nanosecond))
+ }
+ _ => Err(ArrowError::ParseError(
+ "time unit missing or invalid".to_string(),
+ )),
+ },
+ Some(s) if s == "interval" => match map.get("unit") {
+ Some(p) if p == "DAY_TIME" => {
+ Ok(DataType::Interval(IntervalUnit::DayTime))
+ }
+ Some(p) if p == "YEAR_MONTH" => {
+ Ok(DataType::Interval(IntervalUnit::YearMonth))
+ }
+ Some(p) if p == "MONTH_DAY_NANO" => {
+ Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+ }
+ _ => Err(ArrowError::ParseError(
+ "interval unit missing or invalid".to_string(),
+ )),
+ },
+ Some(s) if s == "int" => match map.get("isSigned") {
+ Some(&Value::Bool(true)) => match map.get("bitWidth") {
+ Some(&Value::Number(ref n)) => match n.as_u64() {
+ Some(8) => Ok(DataType::Int8),
+ Some(16) => Ok(DataType::Int16),
+ Some(32) => Ok(DataType::Int32),
+ Some(64) => Ok(DataType::Int64),
+ _ => Err(ArrowError::ParseError(
+ "int bitWidth missing or invalid".to_string(),
+ )),
+ },
+ _ => Err(ArrowError::ParseError(
+ "int bitWidth missing or invalid".to_string(),
+ )),
+ },
+ Some(&Value::Bool(false)) => match map.get("bitWidth") {
+ Some(&Value::Number(ref n)) => match n.as_u64() {
+ Some(8) => Ok(DataType::UInt8),
+ Some(16) => Ok(DataType::UInt16),
+ Some(32) => Ok(DataType::UInt32),
+ Some(64) => Ok(DataType::UInt64),
+ _ => Err(ArrowError::ParseError(
+ "int bitWidth missing or invalid".to_string(),
+ )),
+ },
+ _ => Err(ArrowError::ParseError(
+ "int bitWidth missing or invalid".to_string(),
+ )),
+ },
+ _ => Err(ArrowError::ParseError(
+ "int signed missing or invalid".to_string(),
+ )),
+ },
+ Some(s) if s == "list" => {
+ // return a list with any type as its child isn't defined in
the map
+ Ok(DataType::List(Box::new(default_field)))
+ }
+ Some(s) if s == "largelist" => {
+ // return a largelist with any type as its child isn't defined
in the map
+ Ok(DataType::LargeList(Box::new(default_field)))
+ }
+ Some(s) if s == "fixedsizelist" => {
+ // return a list with any type as its child isn't defined in
the map
+ if let Some(Value::Number(size)) = map.get("listSize") {
+ Ok(DataType::FixedSizeList(
+ Box::new(default_field),
+ size.as_i64().unwrap() as i32,
+ ))
+ } else {
+ Err(ArrowError::ParseError(
+ "Expecting a listSize for fixedsizelist".to_string(),
+ ))
+ }
+ }
+ Some(s) if s == "struct" => {
+ // return an empty `struct` type as its children aren't
defined in the map
+ Ok(DataType::Struct(vec![]))
+ }
+ Some(s) if s == "map" => {
+ if let Some(Value::Bool(keys_sorted)) = map.get("keysSorted") {
+ // Return a map with an empty type as its children aren't
defined in the map
+ Ok(DataType::Map(Box::new(default_field), *keys_sorted))
+ } else {
+ Err(ArrowError::ParseError(
+ "Expecting a keysSorted for map".to_string(),
+ ))
+ }
+ }
+ Some(s) if s == "union" => {
+ if let Some(Value::String(mode)) = map.get("mode") {
+ let union_mode = if mode == "SPARSE" {
+ UnionMode::Sparse
+ } else if mode == "DENSE" {
+ UnionMode::Dense
+ } else {
+ return Err(ArrowError::ParseError(format!(
+ "Unknown union mode {:?} for union",
+ mode
+ )));
+ };
+ if let Some(type_ids) = map.get("typeIds") {
+ let type_ids = type_ids
+ .as_array()
+ .unwrap()
+ .iter()
+ .map(|t| t.as_i64().unwrap() as i8)
+ .collect::<Vec<_>>();
+
+ let default_fields = type_ids
+ .iter()
+ .map(|_| default_field.clone())
+ .collect::<Vec<_>>();
+
+ Ok(DataType::Union(default_fields, type_ids,
union_mode))
+ } else {
+ Err(ArrowError::ParseError(
+ "Expecting a typeIds for union ".to_string(),
+ ))
+ }
+ } else {
+ Err(ArrowError::ParseError(
+ "Expecting a mode for union".to_string(),
+ ))
+ }
+ }
+ Some(other) => Err(ArrowError::ParseError(format!(
+ "invalid or unsupported type name: {} in {:?}",
+ other, json
+ ))),
+ None => Err(ArrowError::ParseError("type name
missing".to_string())),
+ },
+ _ => Err(ArrowError::ParseError(
+ "invalid json value type".to_string(),
+ )),
+ }
+}
+
+/// Generate a JSON representation of the data type.
+pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
+ use serde_json::json;
+ match data_type {
+ DataType::Null => json!({"name": "null"}),
+ DataType::Boolean => json!({"name": "bool"}),
+ DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned":
true}),
+ DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned":
true}),
+ DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned":
true}),
+ DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned":
true}),
+ DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned":
false}),
+ DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned":
false}),
+ DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned":
false}),
+ DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned":
false}),
+ DataType::Float16 => json!({"name": "floatingpoint", "precision":
"HALF"}),
+ DataType::Float32 => json!({"name": "floatingpoint", "precision":
"SINGLE"}),
+ DataType::Float64 => json!({"name": "floatingpoint", "precision":
"DOUBLE"}),
+ DataType::Utf8 => json!({"name": "utf8"}),
+ DataType::LargeUtf8 => json!({"name": "largeutf8"}),
+ DataType::Binary => json!({"name": "binary"}),
+ DataType::LargeBinary => json!({"name": "largebinary"}),
+ DataType::FixedSizeBinary(byte_width) => {
+ json!({"name": "fixedsizebinary", "byteWidth": byte_width})
+ }
+ DataType::Struct(_) => json!({"name": "struct"}),
+ DataType::Union(_, _, _) => json!({"name": "union"}),
+ DataType::List(_) => json!({ "name": "list"}),
+ DataType::LargeList(_) => json!({ "name": "largelist"}),
+ DataType::FixedSizeList(_, length) => {
+ json!({"name":"fixedsizelist", "listSize": length})
+ }
+ DataType::Time32(unit) => {
+ json!({"name": "time", "bitWidth": 32, "unit": match unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }})
+ }
+ DataType::Time64(unit) => {
+ json!({"name": "time", "bitWidth": 64, "unit": match unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }})
+ }
+ DataType::Date32 => {
+ json!({"name": "date", "unit": "DAY"})
+ }
+ DataType::Date64 => {
+ json!({"name": "date", "unit": "MILLISECOND"})
+ }
+ DataType::Timestamp(unit, None) => {
+ json!({"name": "timestamp", "unit": match unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }})
+ }
+ DataType::Timestamp(unit, Some(tz)) => {
+ json!({"name": "timestamp", "unit": match unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }, "timezone": tz})
+ }
+ DataType::Interval(unit) => json!({"name": "interval", "unit": match
unit {
+ IntervalUnit::YearMonth => "YEAR_MONTH",
+ IntervalUnit::DayTime => "DAY_TIME",
+ IntervalUnit::MonthDayNano => "MONTH_DAY_NANO",
+ }}),
+ DataType::Duration(unit) => json!({"name": "duration", "unit": match
unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }}),
+ DataType::Dictionary(_, _) => json!({ "name": "dictionary"}),
+ DataType::Decimal128(precision, scale) => {
+ json!({"name": "decimal", "precision": precision, "scale": scale,
"bitWidth": 128})
+ }
+ DataType::Decimal256(precision, scale) => {
+ json!({"name": "decimal", "precision": precision, "scale": scale,
"bitWidth": 256})
+ }
+ DataType::Map(_, keys_sorted) => {
+ json!({"name": "map", "keysSorted": keys_sorted})
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use serde_json::Value;
+
+ #[test]
+ fn parse_utf8_from_json() {
+ let json = "{\"name\":\"utf8\"}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = data_type_from_json(&value).unwrap();
+ assert_eq!(DataType::Utf8, dt);
+ }
+
+ #[test]
+ fn parse_int32_from_json() {
+ let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = data_type_from_json(&value).unwrap();
+ assert_eq!(DataType::Int32, dt);
+ }
+}
diff --git a/integration-testing/src/util/field.rs
b/integration-testing/src/util/field.rs
new file mode 100644
index 000000000..a2becc004
--- /dev/null
+++ b/integration-testing/src/util/field.rs
@@ -0,0 +1,586 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::util::datatype::{data_type_from_json, data_type_to_json};
+use arrow::datatypes::{DataType, Field};
+use arrow::error::{ArrowError, Result};
+use std::collections::BTreeMap;
+
+/// Parse a `Field` definition from a JSON representation.
+pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
+ use serde_json::Value;
+ match *json {
+ Value::Object(ref map) => {
+ let name = match map.get("name") {
+ Some(&Value::String(ref name)) => name.to_string(),
+ _ => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'name' attribute".to_string(),
+ ));
+ }
+ };
+ let nullable = match map.get("nullable") {
+ Some(&Value::Bool(b)) => b,
+ _ => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'nullable' attribute".to_string(),
+ ));
+ }
+ };
+ let data_type = match map.get("type") {
+ Some(t) => data_type_from_json(t)?,
+ _ => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'type' attribute".to_string(),
+ ));
+ }
+ };
+
+ // Referenced example file:
testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
+ let metadata = match map.get("metadata") {
+ Some(&Value::Array(ref values)) => {
+ let mut res: BTreeMap<String, String> = BTreeMap::new();
+ for value in values {
+ match value.as_object() {
+ Some(map) => {
+ if map.len() != 2 {
+ return Err(ArrowError::ParseError(
+ "Field 'metadata' must have exact two
entries for each key-value map".to_string(),
+ ));
+ }
+ if let (Some(k), Some(v)) =
+ (map.get("key"), map.get("value"))
+ {
+ if let (Some(k_str), Some(v_str)) =
+ (k.as_str(), v.as_str())
+ {
+ res.insert(
+ k_str.to_string().clone(),
+ v_str.to_string().clone(),
+ );
+ } else {
+ return
Err(ArrowError::ParseError("Field 'metadata' must have map value of string
type".to_string()));
+ }
+ } else {
+ return Err(ArrowError::ParseError("Field
'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
+ }
+ }
+ _ => {
+ return Err(ArrowError::ParseError(
+ "Field 'metadata' contains non-object
key-value pair"
+ .to_string(),
+ ));
+ }
+ }
+ }
+ Some(res)
+ }
+ // We also support map format, because Schema's metadata
supports this.
+ // See https://github.com/apache/arrow/pull/5907
+ Some(&Value::Object(ref values)) => {
+ let mut res: BTreeMap<String, String> = BTreeMap::new();
+ for (k, v) in values {
+ if let Some(str_value) = v.as_str() {
+ res.insert(k.clone(),
str_value.to_string().clone());
+ } else {
+ return Err(ArrowError::ParseError(format!(
+ "Field 'metadata' contains non-string value
for key {}",
+ k
+ )));
+ }
+ }
+ Some(res)
+ }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field `metadata` is not json array".to_string(),
+ ));
+ }
+ _ => None,
+ };
+
+ // if data_type is a struct or list, get its children
+ let data_type = match data_type {
+ DataType::List(_)
+ | DataType::LargeList(_)
+ | DataType::FixedSizeList(_, _) => match map.get("children") {
+ Some(Value::Array(values)) => {
+ if values.len() != 1 {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must have one element for a
list data type".to_string(),
+ ));
+ }
+ match data_type {
+ DataType::List(_) => {
+
DataType::List(Box::new(field_from_json(&values[0])?))
+ }
+ DataType::LargeList(_) =>
DataType::LargeList(Box::new(
+ field_from_json(&values[0])?,
+ )),
+ DataType::FixedSizeList(_, int) =>
DataType::FixedSizeList(
+ Box::new(field_from_json(&values[0])?),
+ int,
+ ),
+ _ => unreachable!(
+ "Data type should be a list, largelist or
fixedsizelist"
+ ),
+ }
+ }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must be an array".to_string(),
+ ))
+ }
+ None => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'children' attribute".to_string(),
+ ));
+ }
+ },
+ DataType::Struct(mut fields) => match map.get("children") {
+ Some(Value::Array(values)) => {
+ let struct_fields: Result<Vec<Field>> =
+ values.iter().map(field_from_json).collect();
+ fields.append(&mut struct_fields?);
+ DataType::Struct(fields)
+ }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must be an array".to_string(),
+ ))
+ }
+ None => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'children' attribute".to_string(),
+ ));
+ }
+ },
+ DataType::Map(_, keys_sorted) => {
+ match map.get("children") {
+ Some(Value::Array(values)) if values.len() == 1 => {
+ let child = field_from_json(&values[0])?;
+ // child must be a struct
+ match child.data_type() {
+ DataType::Struct(map_fields) if
map_fields.len() == 2 => {
+ DataType::Map(Box::new(child), keys_sorted)
+ }
+ t => {
+ return Err(ArrowError::ParseError(
+ format!("Map children should be a
struct with 2 fields, found {:?}", t)
+ ))
+ }
+ }
+ }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must be an array with 1
element"
+ .to_string(),
+ ))
+ }
+ None => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'children'
attribute".to_string(),
+ ));
+ }
+ }
+ }
+ DataType::Union(_, type_ids, mode) => match
map.get("children") {
+ Some(Value::Array(values)) => {
+ let union_fields: Vec<Field> =
+
values.iter().map(field_from_json).collect::<Result<_>>()?;
+ DataType::Union(union_fields, type_ids, mode)
+ }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must be an array".to_string(),
+ ))
+ }
+ None => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'children' attribute".to_string(),
+ ));
+ }
+ },
+ _ => data_type,
+ };
+
+ let mut dict_id = 0;
+ let mut dict_is_ordered = false;
+
+ let data_type = match map.get("dictionary") {
+ Some(dictionary) => {
+ let index_type = match dictionary.get("indexType") {
+ Some(t) => data_type_from_json(t)?,
+ _ => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'indexType'
attribute".to_string(),
+ ));
+ }
+ };
+ dict_id = match dictionary.get("id") {
+ Some(Value::Number(n)) => n.as_i64().unwrap(),
+ _ => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'id' attribute".to_string(),
+ ));
+ }
+ };
+ dict_is_ordered = match dictionary.get("isOrdered") {
+ Some(&Value::Bool(n)) => n,
+ _ => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'isOrdered'
attribute".to_string(),
+ ));
+ }
+ };
+ DataType::Dictionary(Box::new(index_type),
Box::new(data_type))
+ }
+ _ => data_type,
+ };
+
+ let mut field =
+ Field::new_dict(&name, data_type, nullable, dict_id,
dict_is_ordered);
+ field.set_metadata(metadata);
+ Ok(field)
+ }
+ _ => Err(ArrowError::ParseError(
+ "Invalid json value type for field".to_string(),
+ )),
+ }
+}
+
+/// Generate a JSON representation of the `Field`.
+pub fn field_to_json(field: &Field) -> serde_json::Value {
+ let children: Vec<serde_json::Value> = match field.data_type() {
+ DataType::Struct(fields) => fields.iter().map(field_to_json).collect(),
+ DataType::List(field)
+ | DataType::LargeList(field)
+ | DataType::FixedSizeList(field, _)
+ | DataType::Map(field, _) => vec![field_to_json(field)],
+ _ => vec![],
+ };
+
+ match field.data_type() {
+ DataType::Dictionary(ref index_type, ref value_type) =>
serde_json::json!({
+ "name": field.name(),
+ "nullable": field.is_nullable(),
+ "type": data_type_to_json(value_type),
+ "children": children,
+ "dictionary": {
+ "id": field.dict_id().unwrap(),
+ "indexType": data_type_to_json(index_type),
+ "isOrdered": field.dict_is_ordered().unwrap(),
+ }
+ }),
+ _ => serde_json::json!({
+ "name": field.name(),
+ "nullable": field.is_nullable(),
+ "type": data_type_to_json(field.data_type()),
+ "children": children
+ }),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::datatypes::UnionMode;
+ use serde_json::Value;
+
+ #[test]
+ fn struct_field_to_json() {
+ let f = Field::new(
+ "address",
+ DataType::Struct(vec![
+ Field::new("street", DataType::Utf8, false),
+ Field::new("zip", DataType::UInt16, false),
+ ]),
+ false,
+ );
+ let value: Value = serde_json::from_str(
+ r#"{
+ "name": "address",
+ "nullable": false,
+ "type": {
+ "name": "struct"
+ },
+ "children": [
+ {
+ "name": "street",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ },
+ {
+ "name": "zip",
+ "nullable": false,
+ "type": {
+ "name": "int",
+ "bitWidth": 16,
+ "isSigned": false
+ },
+ "children": []
+ }
+ ]
+ }"#,
+ )
+ .unwrap();
+ assert_eq!(value, field_to_json(&f));
+ }
+
+ #[test]
+ fn map_field_to_json() {
+ let f = Field::new(
+ "my_map",
+ DataType::Map(
+ Box::new(Field::new(
+ "my_entries",
+ DataType::Struct(vec![
+ Field::new("my_keys", DataType::Utf8, false),
+ Field::new("my_values", DataType::UInt16, true),
+ ]),
+ false,
+ )),
+ true,
+ ),
+ false,
+ );
+ let value: Value = serde_json::from_str(
+ r#"{
+ "name": "my_map",
+ "nullable": false,
+ "type": {
+ "name": "map",
+ "keysSorted": true
+ },
+ "children": [
+ {
+ "name": "my_entries",
+ "nullable": false,
+ "type": {
+ "name": "struct"
+ },
+ "children": [
+ {
+ "name": "my_keys",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ },
+ {
+ "name": "my_values",
+ "nullable": true,
+ "type": {
+ "name": "int",
+ "bitWidth": 16,
+ "isSigned": false
+ },
+ "children": []
+ }
+ ]
+ }
+ ]
+ }"#,
+ )
+ .unwrap();
+ assert_eq!(value, field_to_json(&f));
+ }
+
+ #[test]
+ fn primitive_field_to_json() {
+ let f = Field::new("first_name", DataType::Utf8, false);
+ let value: Value = serde_json::from_str(
+ r#"{
+ "name": "first_name",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ }"#,
+ )
+ .unwrap();
+ assert_eq!(value, field_to_json(&f));
+ }
+ #[test]
+ fn parse_struct_from_json() {
+ let json = r#"
+ {
+ "name": "address",
+ "type": {
+ "name": "struct"
+ },
+ "nullable": false,
+ "children": [
+ {
+ "name": "street",
+ "type": {
+ "name": "utf8"
+ },
+ "nullable": false,
+ "children": []
+ },
+ {
+ "name": "zip",
+ "type": {
+ "name": "int",
+ "isSigned": false,
+ "bitWidth": 16
+ },
+ "nullable": false,
+ "children": []
+ }
+ ]
+ }
+ "#;
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = field_from_json(&value).unwrap();
+
+ let expected = Field::new(
+ "address",
+ DataType::Struct(vec![
+ Field::new("street", DataType::Utf8, false),
+ Field::new("zip", DataType::UInt16, false),
+ ]),
+ false,
+ );
+
+ assert_eq!(expected, dt);
+ }
+
+ #[test]
+ fn parse_map_from_json() {
+ let json = r#"
+ {
+ "name": "my_map",
+ "nullable": false,
+ "type": {
+ "name": "map",
+ "keysSorted": true
+ },
+ "children": [
+ {
+ "name": "my_entries",
+ "nullable": false,
+ "type": {
+ "name": "struct"
+ },
+ "children": [
+ {
+ "name": "my_keys",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ },
+ {
+ "name": "my_values",
+ "nullable": true,
+ "type": {
+ "name": "int",
+ "bitWidth": 16,
+ "isSigned": false
+ },
+ "children": []
+ }
+ ]
+ }
+ ]
+ }
+ "#;
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = field_from_json(&value).unwrap();
+
+ let expected = Field::new(
+ "my_map",
+ DataType::Map(
+ Box::new(Field::new(
+ "my_entries",
+ DataType::Struct(vec![
+ Field::new("my_keys", DataType::Utf8, false),
+ Field::new("my_values", DataType::UInt16, true),
+ ]),
+ false,
+ )),
+ true,
+ ),
+ false,
+ );
+
+ assert_eq!(expected, dt);
+ }
+
+ #[test]
+ fn parse_union_from_json() {
+ let json = r#"
+ {
+ "name": "my_union",
+ "nullable": false,
+ "type": {
+ "name": "union",
+ "mode": "SPARSE",
+ "typeIds": [
+ 5,
+ 7
+ ]
+ },
+ "children": [
+ {
+ "name": "f1",
+ "type": {
+ "name": "int",
+ "isSigned": true,
+ "bitWidth": 32
+ },
+ "nullable": true,
+ "children": []
+ },
+ {
+ "name": "f2",
+ "type": {
+ "name": "utf8"
+ },
+ "nullable": true,
+ "children": []
+ }
+ ]
+ }
+ "#;
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = field_from_json(&value).unwrap();
+
+ let expected = Field::new(
+ "my_union",
+ DataType::Union(
+ vec![
+ Field::new("f1", DataType::Int32, true),
+ Field::new("f2", DataType::Utf8, true),
+ ],
+ vec![5, 7],
+ UnionMode::Sparse,
+ ),
+ false,
+ );
+
+ assert_eq!(expected, dt);
+ }
+}
diff --git a/integration-testing/src/util.rs
b/integration-testing/src/util/mod.rs
similarity index 99%
rename from integration-testing/src/util.rs
rename to integration-testing/src/util/mod.rs
index e098c4e14..9ecd30136 100644
--- a/integration-testing/src/util.rs
+++ b/integration-testing/src/util/mod.rs
@@ -36,7 +36,17 @@ use arrow::record_batch::{RecordBatch, RecordBatchReader};
use arrow::util::bit_util;
use arrow::util::decimal::Decimal256;
+mod datatype;
+mod field;
+mod schema;
+
+use crate::util::datatype::data_type_to_json;
+use crate::util::field::field_from_json;
+pub use schema::*;
+
/// A struct that represents an Arrow file with a schema and record batches
+///
+/// See
<https://github.com/apache/arrow/blob/master/docs/source/format/Integration.rst#json-test-data-format>
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJson {
pub schema: ArrowJsonSchema,
@@ -90,7 +100,7 @@ impl From<&Field> for ArrowJsonField {
Self {
name: field.name().to_string(),
- field_type: field.data_type().to_json(),
+ field_type: data_type_to_json(field.data_type()),
nullable: field.is_nullable(),
children: vec![],
dictionary: None, // TODO: not enough info
@@ -256,7 +266,7 @@ impl ArrowJsonField {
fn to_arrow_field(&self) -> Result<Field> {
// a bit regressive, but we have to convert the field to JSON in order
to convert it
let field = serde_json::to_value(self)?;
- Field::from(&field)
+ field_from_json(&field)
}
}
diff --git a/integration-testing/src/util/schema.rs
b/integration-testing/src/util/schema.rs
new file mode 100644
index 000000000..7e3475e6f
--- /dev/null
+++ b/integration-testing/src/util/schema.rs
@@ -0,0 +1,733 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::util::field::{field_from_json, field_to_json};
+use arrow::datatypes::Schema;
+use arrow::error::{ArrowError, Result};
+use std::collections::HashMap;
+
+/// Generate a JSON representation of the `Schema`.
+pub fn schema_to_json(schema: &Schema) -> serde_json::Value {
+ serde_json::json!({
+ "fields":
schema.fields().iter().map(field_to_json).collect::<Vec<_>>(),
+ "metadata": serde_json::to_value(schema.metadata()).unwrap()
+ })
+}
+
+/// Parse a `Schema` definition from a JSON representation.
+pub fn schema_from_json(json: &serde_json::Value) -> Result<Schema> {
+ use serde_json::Value;
+ match *json {
+ Value::Object(ref schema) => {
+ let fields = if let Some(Value::Array(fields)) =
schema.get("fields") {
+ fields.iter().map(field_from_json).collect::<Result<_>>()?
+ } else {
+ return Err(ArrowError::ParseError(
+ "Schema fields should be an array".to_string(),
+ ));
+ };
+
+ let metadata = if let Some(value) = schema.get("metadata") {
+ from_metadata(value)?
+ } else {
+ HashMap::default()
+ };
+
+ Ok(Schema::new_with_metadata(fields, metadata))
+ }
+ _ => Err(ArrowError::ParseError(
+ "Invalid json value type for schema".to_string(),
+ )),
+ }
+}
+
+/// Parse a `metadata` definition from a JSON representation.
+/// The JSON can either be an Object or an Array of Objects.
+fn from_metadata(json: &serde_json::Value) -> Result<HashMap<String, String>> {
+ use serde_json::Value;
+ match json {
+ Value::Array(_) => {
+ let mut hashmap = HashMap::new();
+ let values: Vec<MetadataKeyValue> =
serde_json::from_value(json.clone())
+ .map_err(|_| {
+ ArrowError::JsonError(
+ "Unable to parse object into key-value
pair".to_string(),
+ )
+ })?;
+ for meta in values {
+ hashmap.insert(meta.key.clone(), meta.value);
+ }
+ Ok(hashmap)
+ }
+ Value::Object(md) => md
+ .iter()
+ .map(|(k, v)| {
+ if let Value::String(v) = v {
+ Ok((k.to_string(), v.to_string()))
+ } else {
+ Err(ArrowError::ParseError(
+ "metadata `value` field must be a string".to_string(),
+ ))
+ }
+ })
+ .collect::<Result<_>>(),
+ _ => Err(ArrowError::ParseError(
+ "`metadata` field must be an object".to_string(),
+ )),
+ }
+}
+
+#[derive(serde::Deserialize)]
+struct MetadataKeyValue {
+ key: String,
+ value: String,
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
+ use serde_json::Value;
+
+ #[test]
+ fn schema_json() {
+ // Add some custom metadata
+ let metadata: HashMap<String, String> =
+ [("Key".to_string(), "Value".to_string())]
+ .iter()
+ .cloned()
+ .collect();
+
+ let schema = Schema::new_with_metadata(
+ vec![
+ Field::new("c1", DataType::Utf8, false),
+ Field::new("c2", DataType::Binary, false),
+ Field::new("c3", DataType::FixedSizeBinary(3), false),
+ Field::new("c4", DataType::Boolean, false),
+ Field::new("c5", DataType::Date32, false),
+ Field::new("c6", DataType::Date64, false),
+ Field::new("c7", DataType::Time32(TimeUnit::Second), false),
+ Field::new("c8", DataType::Time32(TimeUnit::Millisecond),
false),
+ Field::new("c9", DataType::Time32(TimeUnit::Microsecond),
false),
+ Field::new("c10", DataType::Time32(TimeUnit::Nanosecond),
false),
+ Field::new("c11", DataType::Time64(TimeUnit::Second), false),
+ Field::new("c12", DataType::Time64(TimeUnit::Millisecond),
false),
+ Field::new("c13", DataType::Time64(TimeUnit::Microsecond),
false),
+ Field::new("c14", DataType::Time64(TimeUnit::Nanosecond),
false),
+ Field::new("c15", DataType::Timestamp(TimeUnit::Second, None),
false),
+ Field::new(
+ "c16",
+ DataType::Timestamp(TimeUnit::Millisecond,
Some("UTC".to_string())),
+ false,
+ ),
+ Field::new(
+ "c17",
+ DataType::Timestamp(
+ TimeUnit::Microsecond,
+ Some("Africa/Johannesburg".to_string()),
+ ),
+ false,
+ ),
+ Field::new(
+ "c18",
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ false,
+ ),
+ Field::new("c19", DataType::Interval(IntervalUnit::DayTime),
false),
+ Field::new("c20", DataType::Interval(IntervalUnit::YearMonth),
false),
+ Field::new("c21",
DataType::Interval(IntervalUnit::MonthDayNano), false),
+ Field::new(
+ "c22",
+ DataType::List(Box::new(Field::new("item",
DataType::Boolean, true))),
+ false,
+ ),
+ Field::new(
+ "c23",
+ DataType::FixedSizeList(
+ Box::new(Field::new("bools", DataType::Boolean,
false)),
+ 5,
+ ),
+ false,
+ ),
+ Field::new(
+ "c24",
+ DataType::List(Box::new(Field::new(
+ "inner_list",
+ DataType::List(Box::new(Field::new(
+ "struct",
+ DataType::Struct(vec![]),
+ true,
+ ))),
+ false,
+ ))),
+ true,
+ ),
+ Field::new(
+ "c25",
+ DataType::Struct(vec![
+ Field::new("a", DataType::Utf8, false),
+ Field::new("b", DataType::UInt16, false),
+ ]),
+ false,
+ ),
+ Field::new("c26", DataType::Interval(IntervalUnit::YearMonth),
true),
+ Field::new("c27", DataType::Interval(IntervalUnit::DayTime),
true),
+ Field::new("c28",
DataType::Interval(IntervalUnit::MonthDayNano), true),
+ Field::new("c29", DataType::Duration(TimeUnit::Second), false),
+ Field::new("c30", DataType::Duration(TimeUnit::Millisecond),
false),
+ Field::new("c31", DataType::Duration(TimeUnit::Microsecond),
false),
+ Field::new("c32", DataType::Duration(TimeUnit::Nanosecond),
false),
+ Field::new_dict(
+ "c33",
+ DataType::Dictionary(
+ Box::new(DataType::Int32),
+ Box::new(DataType::Utf8),
+ ),
+ true,
+ 123,
+ true,
+ ),
+ Field::new("c34", DataType::LargeBinary, true),
+ Field::new("c35", DataType::LargeUtf8, true),
+ Field::new(
+ "c36",
+ DataType::LargeList(Box::new(Field::new(
+ "inner_large_list",
+ DataType::LargeList(Box::new(Field::new(
+ "struct",
+ DataType::Struct(vec![]),
+ false,
+ ))),
+ true,
+ ))),
+ true,
+ ),
+ Field::new(
+ "c37",
+ DataType::Map(
+ Box::new(Field::new(
+ "my_entries",
+ DataType::Struct(vec![
+ Field::new("my_keys", DataType::Utf8, false),
+ Field::new("my_values", DataType::UInt16,
true),
+ ]),
+ false,
+ )),
+ true,
+ ),
+ false,
+ ),
+ ],
+ metadata,
+ );
+
+ let expected = schema_to_json(&schema);
+ let json = r#"{
+ "fields": [
+ {
+ "name": "c1",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ },
+ {
+ "name": "c2",
+ "nullable": false,
+ "type": {
+ "name": "binary"
+ },
+ "children": []
+ },
+ {
+ "name": "c3",
+ "nullable": false,
+ "type": {
+ "name": "fixedsizebinary",
+ "byteWidth": 3
+ },
+ "children": []
+ },
+ {
+ "name": "c4",
+ "nullable": false,
+ "type": {
+ "name": "bool"
+ },
+ "children": []
+ },
+ {
+ "name": "c5",
+ "nullable": false,
+ "type": {
+ "name": "date",
+ "unit": "DAY"
+ },
+ "children": []
+ },
+ {
+ "name": "c6",
+ "nullable": false,
+ "type": {
+ "name": "date",
+ "unit": "MILLISECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c7",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 32,
+ "unit": "SECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c8",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 32,
+ "unit": "MILLISECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c9",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 32,
+ "unit": "MICROSECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c10",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 32,
+ "unit": "NANOSECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c11",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 64,
+ "unit": "SECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c12",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 64,
+ "unit": "MILLISECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c13",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 64,
+ "unit": "MICROSECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c14",
+ "nullable": false,
+ "type": {
+ "name": "time",
+ "bitWidth": 64,
+ "unit": "NANOSECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c15",
+ "nullable": false,
+ "type": {
+ "name": "timestamp",
+ "unit": "SECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c16",
+ "nullable": false,
+ "type": {
+ "name": "timestamp",
+ "unit": "MILLISECOND",
+ "timezone": "UTC"
+ },
+ "children": []
+ },
+ {
+ "name": "c17",
+ "nullable": false,
+ "type": {
+ "name": "timestamp",
+ "unit": "MICROSECOND",
+ "timezone": "Africa/Johannesburg"
+ },
+ "children": []
+ },
+ {
+ "name": "c18",
+ "nullable": false,
+ "type": {
+ "name": "timestamp",
+ "unit": "NANOSECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c19",
+ "nullable": false,
+ "type": {
+ "name": "interval",
+ "unit": "DAY_TIME"
+ },
+ "children": []
+ },
+ {
+ "name": "c20",
+ "nullable": false,
+ "type": {
+ "name": "interval",
+ "unit": "YEAR_MONTH"
+ },
+ "children": []
+ },
+ {
+ "name": "c21",
+ "nullable": false,
+ "type": {
+ "name": "interval",
+ "unit": "MONTH_DAY_NANO"
+ },
+ "children": []
+ },
+ {
+ "name": "c22",
+ "nullable": false,
+ "type": {
+ "name": "list"
+ },
+ "children": [
+ {
+ "name": "item",
+ "nullable": true,
+ "type": {
+ "name": "bool"
+ },
+ "children": []
+ }
+ ]
+ },
+ {
+ "name": "c23",
+ "nullable": false,
+ "type": {
+ "name": "fixedsizelist",
+ "listSize": 5
+ },
+ "children": [
+ {
+ "name": "bools",
+ "nullable": false,
+ "type": {
+ "name": "bool"
+ },
+ "children": []
+ }
+ ]
+ },
+ {
+ "name": "c24",
+ "nullable": true,
+ "type": {
+ "name": "list"
+ },
+ "children": [
+ {
+ "name": "inner_list",
+ "nullable": false,
+ "type": {
+ "name": "list"
+ },
+ "children": [
+ {
+ "name": "struct",
+ "nullable": true,
+ "type": {
+ "name": "struct"
+ },
+ "children": []
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "c25",
+ "nullable": false,
+ "type": {
+ "name": "struct"
+ },
+ "children": [
+ {
+ "name": "a",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ },
+ {
+ "name": "b",
+ "nullable": false,
+ "type": {
+ "name": "int",
+ "bitWidth": 16,
+ "isSigned": false
+ },
+ "children": []
+ }
+ ]
+ },
+ {
+ "name": "c26",
+ "nullable": true,
+ "type": {
+ "name": "interval",
+ "unit": "YEAR_MONTH"
+ },
+ "children": []
+ },
+ {
+ "name": "c27",
+ "nullable": true,
+ "type": {
+ "name": "interval",
+ "unit": "DAY_TIME"
+ },
+ "children": []
+ },
+ {
+ "name": "c28",
+ "nullable": true,
+ "type": {
+ "name": "interval",
+ "unit": "MONTH_DAY_NANO"
+ },
+ "children": []
+ },
+ {
+ "name": "c29",
+ "nullable": false,
+ "type": {
+ "name": "duration",
+ "unit": "SECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c30",
+ "nullable": false,
+ "type": {
+ "name": "duration",
+ "unit": "MILLISECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c31",
+ "nullable": false,
+ "type": {
+ "name": "duration",
+ "unit": "MICROSECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c32",
+ "nullable": false,
+ "type": {
+ "name": "duration",
+ "unit": "NANOSECOND"
+ },
+ "children": []
+ },
+ {
+ "name": "c33",
+ "nullable": true,
+ "children": [],
+ "type": {
+ "name": "utf8"
+ },
+ "dictionary": {
+ "id": 123,
+ "indexType": {
+ "name": "int",
+ "bitWidth": 32,
+ "isSigned": true
+ },
+ "isOrdered": true
+ }
+ },
+ {
+ "name": "c34",
+ "nullable": true,
+ "type": {
+ "name": "largebinary"
+ },
+ "children": []
+ },
+ {
+ "name": "c35",
+ "nullable": true,
+ "type": {
+ "name": "largeutf8"
+ },
+ "children": []
+ },
+ {
+ "name": "c36",
+ "nullable": true,
+ "type": {
+ "name": "largelist"
+ },
+ "children": [
+ {
+ "name": "inner_large_list",
+ "nullable": true,
+ "type": {
+ "name": "largelist"
+ },
+ "children": [
+ {
+ "name": "struct",
+ "nullable": false,
+ "type": {
+ "name": "struct"
+ },
+ "children": []
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "c37",
+ "nullable": false,
+ "type": {
+ "name": "map",
+ "keysSorted": true
+ },
+ "children": [
+ {
+ "name": "my_entries",
+ "nullable": false,
+ "type": {
+ "name": "struct"
+ },
+ "children": [
+ {
+ "name": "my_keys",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ },
+ {
+ "name": "my_values",
+ "nullable": true,
+ "type": {
+ "name": "int",
+ "bitWidth": 16,
+ "isSigned": false
+ },
+ "children": []
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "metadata" : {
+ "Key": "Value"
+ }
+ }"#;
+ let value: Value = serde_json::from_str(json).unwrap();
+ assert_eq!(expected, value);
+
+ // convert back to a schema
+ let value: Value = serde_json::from_str(json).unwrap();
+ let schema2 = schema_from_json(&value).unwrap();
+
+ assert_eq!(schema, schema2);
+
+ // Check that empty metadata produces empty value in JSON and can be
parsed
+ let json = r#"{
+ "fields": [
+ {
+ "name": "c1",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ }
+ ],
+ "metadata": {}
+ }"#;
+ let value: Value = serde_json::from_str(json).unwrap();
+ let schema = schema_from_json(&value).unwrap();
+ assert!(schema.metadata.is_empty());
+
+ // Check that metadata field is not required in the JSON.
+ let json = r#"{
+ "fields": [
+ {
+ "name": "c1",
+ "nullable": false,
+ "type": {
+ "name": "utf8"
+ },
+ "children": []
+ }
+ ]
+ }"#;
+ let value: Value = serde_json::from_str(json).unwrap();
+ let schema = schema_from_json(&value).unwrap();
+ assert!(schema.metadata.is_empty());
+ }
+}