[
https://issues.apache.org/jira/browse/ARROW-2385?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16425597#comment-16425597
]
ASF GitHub Bot commented on ARROW-2385:
---------------------------------------
xhochy closed pull request #1829: ARROW-2385: [Rust] implement to_json for
DataType and Field
URL: https://github.com/apache/arrow/pull/1829
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 9b7b3102f..c3120cfdc 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -35,4 +35,5 @@ path = "src/lib.rs"
[dependencies]
bytes = "0.4"
-libc = "0.2"
\ No newline at end of file
+libc = "0.2"
+serde_json = "1.0.13"
\ No newline at end of file
diff --git a/rust/src/array.rs b/rust/src/array.rs
index 960f33e16..7fd343346 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -23,7 +23,6 @@ use std::string::String;
use super::bitmap::Bitmap;
use super::buffer::Buffer;
-use super::error::*;
use super::list::List;
pub enum ArrayData {
@@ -91,74 +90,6 @@ impl Array {
}
}
-/// type-safe array operations
-trait ArrayOps<T> {
- /// Get one element from an array. Note that this is an expensive call
since it
- /// will pattern match the type of the array on every invocation. We
should add
- /// other efficient iterator and map methods so we can perform columnar
operations
- /// instead.
- fn get(&self, i: usize) -> Result<T, Error>;
-
- /// Compare two same-typed arrays using a boolean closure e.g. eq, gt, lt,
and so on
- fn compare(&self, other: &Array, f: &Fn(T, T) -> bool) ->
Result<Vec<bool>, Error>;
-
- /// Perform a computation on two same-typed arrays and produce a result of
the same type e.g. c = a + b
- fn compute(&self, other: &Array, f: &Fn(T, T) -> T) -> Result<Vec<T>,
Error>;
-}
-
-macro_rules! array_ops {
- ($DT:ty, $AT:ident) => {
- impl ArrayOps<$DT> for Array {
- fn get(&self, i: usize) -> Result<$DT, Error> {
- match self.data() {
- &ArrayData::$AT(ref buf) => Ok(unsafe {
*buf.data().offset(i as isize) }),
- _ => Err(Error::from("Request for $DT but array is not
$DT")),
- }
- }
- fn compare(&self, other: &Array, f: &Fn($DT, $DT) -> bool) ->
Result<Vec<bool>, Error> {
- match (&self.data, &other.data) {
- (&ArrayData::$AT(ref l), &ArrayData::$AT(ref r)) => {
- let mut b: Vec<bool> = Vec::with_capacity(self.len as
usize);
- for i in 0..self.len as isize {
- let lv: $DT = unsafe { *l.data().offset(i) };
- let rv: $DT = unsafe { *r.data().offset(i) };
- b.push(f(lv, rv));
- }
- Ok(b)
- }
- _ => Err(Error::from("Cannot compare arrays of this
type")),
- }
- }
- fn compute(&self, other: &Array, f: &Fn($DT, $DT) -> $DT) ->
Result<Vec<$DT>, Error> {
- match (&self.data, &other.data) {
- (&ArrayData::$AT(ref l), &ArrayData::$AT(ref r)) => {
- let mut b: Vec<$DT> = Vec::with_capacity(self.len as
usize);
- for i in 0..self.len as isize {
- let lv: $DT = unsafe { *l.data().offset(i) };
- let rv: $DT = unsafe { *r.data().offset(i) };
- b.push(f(lv, rv));
- }
- Ok(b)
- }
- _ => Err(Error::from("Cannot compare arrays of this
type")),
- }
- }
- }
- };
-}
-
-array_ops!(bool, Boolean);
-array_ops!(f64, Float64);
-array_ops!(f32, Float32);
-array_ops!(u8, UInt8);
-array_ops!(u16, UInt16);
-array_ops!(u32, UInt32);
-array_ops!(u64, UInt64);
-array_ops!(i8, Int8);
-array_ops!(i16, Int16);
-array_ops!(i32, Int32);
-array_ops!(i64, Int64);
-
macro_rules! array_from_primitive {
($DT:ty) => {
impl From<Vec<$DT>> for Array {
@@ -307,12 +238,12 @@ mod tests {
fn test_from_i32() {
let a = Array::from(vec![15, 14, 13, 12, 11]);
assert_eq!(5, a.len());
-
- assert_eq!(15, a.get(0).unwrap());
- assert_eq!(14, a.get(1).unwrap());
- assert_eq!(13, a.get(2).unwrap());
- assert_eq!(12, a.get(3).unwrap());
- assert_eq!(11, a.get(4).unwrap());
+ match a.data() {
+ &ArrayData::Int32(ref b) => {
+ assert_eq!(vec![15, 14, 13, 12, 11],
b.iter().collect::<Vec<i32>>());
+ }
+ _ => panic!(),
+ }
}
#[test]
@@ -330,7 +261,7 @@ mod tests {
#[test]
fn test_struct() {
- let _schema = Schema::new(vec![
+ let _schema = DataType::Struct(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Float32, false),
]);
@@ -340,43 +271,4 @@ mod tests {
let _ = Rc::new(Array::from(vec![a, b]));
}
- #[test]
- fn test_array_eq() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compare(&b, &|a: i32, b: i32| a == b).unwrap();
- assert_eq!(c, vec![false, false, true, false, false]);
- }
-
- #[test]
- fn test_array_lt() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compare(&b, &|a: i32, b: i32| a < b).unwrap();
- assert_eq!(c, vec![true, true, false, false, false]);
- }
-
- #[test]
- fn test_array_gt() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compare(&b, &|a: i32, b: i32| a > b).unwrap();
- assert_eq!(c, vec![false, false, false, true, true]);
- }
-
- #[test]
- fn test_array_add() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compute(&b, &|a: i32, b: i32| a + b).unwrap();
- assert_eq!(c, vec![6, 6, 6, 6, 6]);
- }
-
- #[test]
- fn test_array_multiply() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compute(&b, &|a: i32, b: i32| a * b).unwrap();
- assert_eq!(c, vec![5, 8, 9, 8, 5]);
- }
}
diff --git a/rust/src/buffer.rs b/rust/src/buffer.rs
index 72b2a27d8..45b728c39 100644
--- a/rust/src/buffer.rs
+++ b/rust/src/buffer.rs
@@ -145,4 +145,59 @@ mod tests {
let v: Vec<i32> = it.map(|n| n + 1).collect();
assert_eq!(vec![2, 3, 4, 5, 6], v);
}
+
+ #[test]
+ fn test_buffer_eq() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a == b)
+ .collect::<Vec<bool>>();
+ assert_eq!(c, vec![false, false, true, false, false]);
+ }
+
+ #[test]
+ fn test_buffer_lt() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a < b)
+ .collect::<Vec<bool>>();
+ assert_eq!(c, vec![true, true, false, false, false]);
+ }
+
+ #[test]
+ fn test_buffer_gt() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a > b)
+ .collect::<Vec<bool>>();
+ assert_eq!(c, vec![false, false, false, true, true]);
+ }
+
+ #[test]
+ fn test_buffer_add() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a + b)
+ .collect::<Vec<i32>>();
+ assert_eq!(c, vec![6, 6, 6, 6, 6]);
+ }
+
+ #[test]
+ fn test_buffer_multiply() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a * b)
+ .collect::<Vec<i32>>();
+ assert_eq!(c, vec![5, 8, 9, 8, 5]);
+ }
}
diff --git a/rust/src/datatypes.rs b/rust/src/datatypes.rs
index a812f3222..4f022ba45 100644
--- a/rust/src/datatypes.rs
+++ b/rust/src/datatypes.rs
@@ -15,7 +15,15 @@
// specific language governing permissions and limitations
// under the License.
-#[derive(Debug, Clone)]
+use serde_json;
+use serde_json::Value;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum ArrowError {
+ ParseError(String),
+}
+
+#[derive(Debug, Clone, PartialEq)]
pub enum DataType {
Boolean,
Int8,
@@ -26,13 +34,105 @@ pub enum DataType {
UInt16,
UInt32,
UInt64,
+ Float16,
Float32,
Float64,
Utf8,
Struct(Vec<Field>),
}
-#[derive(Debug, Clone)]
+impl DataType {
+ fn from(json: &Value) -> Result<DataType, ArrowError> {
+ //println!("DataType::from({:?})", json);
+ match json {
+ &Value::Object(ref map) => match map.get("name") {
+ Some(s) if s == "bool" => Ok(DataType::Boolean),
+ Some(s) if s == "utf8" => Ok(DataType::Utf8),
+ Some(s) if s == "floatingpoint" => match map.get("precision") {
+ Some(p) if p == "HALF" => Ok(DataType::Float16),
+ Some(p) if p == "SINGLE" => Ok(DataType::Float32),
+ Some(p) if p == "DOUBLE" => Ok(DataType::Float64),
+ _ => Err(ArrowError::ParseError(format!(
+ "floatingpoint precision missing or invalid"
+ ))),
+ },
+ Some(s) if s == "int" => match map.get("isSigned") {
+ Some(&Value::Bool(true)) => match map.get("bitWidth") {
+ Some(&Value::Number(ref n)) => match n.as_u64() {
+ Some(8) => Ok(DataType::Int8),
+ Some(16) => Ok(DataType::Int16),
+ Some(32) => Ok(DataType::Int32),
+ Some(64) => Ok(DataType::Int32),
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ Some(&Value::Bool(false)) => match map.get("bitWidth") {
+ Some(&Value::Number(ref n)) => match n.as_u64() {
+ Some(8) => Ok(DataType::UInt8),
+ Some(16) => Ok(DataType::UInt16),
+ Some(32) => Ok(DataType::UInt32),
+ Some(64) => Ok(DataType::UInt64),
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ _ => Err(ArrowError::ParseError(format!(
+ "int signed missing or invalid"
+ ))),
+ },
+ Some(other) => Err(ArrowError::ParseError(format!(
+ "invalid type name: {}",
+ other
+ ))),
+ None => match map.get("fields") {
+ Some(&Value::Array(ref fields_array)) => {
+ let fields = fields_array
+ .iter()
+ .map(|f| Field::from(f))
+ .collect::<Result<Vec<Field>, ArrowError>>();
+ Ok(DataType::Struct(fields?))
+ }
+ _ => Err(ArrowError::ParseError(format!("empty type"))),
+ },
+ },
+ _ => Err(ArrowError::ParseError(format!("invalid json value
type"))),
+ }
+ }
+
+ pub fn to_json(&self) -> Value {
+ match self {
+ &DataType::Boolean => json!({"name": "bool"}),
+ &DataType::Int8 => json!({"name": "int", "bitWidth": 8,
"isSigned": true}),
+ &DataType::Int16 => json!({"name": "int", "bitWidth": 16,
"isSigned": true}),
+ &DataType::Int32 => json!({"name": "int", "bitWidth": 32,
"isSigned": true}),
+ &DataType::Int64 => json!({"name": "int", "bitWidth": 64,
"isSigned": true}),
+ &DataType::UInt8 => json!({"name": "int", "bitWidth": 8,
"isSigned": false}),
+ &DataType::UInt16 => json!({"name": "int", "bitWidth": 16,
"isSigned": false}),
+ &DataType::UInt32 => json!({"name": "int", "bitWidth": 32,
"isSigned": false}),
+ &DataType::UInt64 => json!({"name": "int", "bitWidth": 64,
"isSigned": false}),
+ &DataType::Float16 => json!({"name": "floatingpoint", "precision":
"HALF"}),
+ &DataType::Float32 => json!({"name": "floatingpoint", "precision":
"SINGLE"}),
+ &DataType::Float64 => json!({"name": "floatingpoint", "precision":
"DOUBLE"}),
+ &DataType::Utf8 => json!({"name": "utf8"}),
+ &DataType::Struct(ref fields) => {
+ let field_json_array =
+ Value::Array(fields.iter().map(|f|
f.to_json()).collect::<Vec<Value>>());
+ json!({ "fields": field_json_array })
+ }
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
pub struct Field {
pub name: String,
pub data_type: DataType,
@@ -48,6 +148,54 @@ impl Field {
}
}
+ pub fn from(json: &Value) -> Result<Self, ArrowError> {
+ //println!("Field::from({:?}", json);
+ match json {
+ &Value::Object(ref map) => {
+ let name = match map.get("name") {
+ Some(&Value::String(ref name)) => name.to_string(),
+ _ => {
+ return Err(ArrowError::ParseError(format!(
+ "Field missing 'name' attribute"
+ )))
+ }
+ };
+ let nullable = match map.get("nullable") {
+ Some(&Value::Bool(b)) => b,
+ _ => {
+ return Err(ArrowError::ParseError(format!(
+ "Field missing 'nullable' attribute"
+ )))
+ }
+ };
+ let data_type = match map.get("type") {
+ Some(t) => DataType::from(t)?,
+ _ => {
+ return Err(ArrowError::ParseError(format!(
+ "Field missing 'type' attribute"
+ )))
+ }
+ };
+ Ok(Field {
+ name,
+ nullable,
+ data_type,
+ })
+ }
+ _ => Err(ArrowError::ParseError(format!(
+ "Invalid json value type for field"
+ ))),
+ }
+ }
+
+ pub fn to_json(&self) -> Value {
+ json!({
+ "name": self.name,
+ "nullable": self.nullable,
+ "type": self.data_type.to_json(),
+ })
+ }
+
pub fn to_string(&self) -> String {
format!("{}: {:?}", self.name, self.data_type)
}
@@ -87,8 +235,8 @@ mod tests {
use super::*;
#[test]
- fn test_define_schema() {
- let _person = Schema::new(vec![
+ fn create_struct_type() {
+ let _person = DataType::Struct(vec![
Field::new("first_name", DataType::Utf8, false),
Field::new("last_name", DataType::Utf8, false),
Field::new(
@@ -101,4 +249,66 @@ mod tests {
),
]);
}
+
+ #[test]
+ fn struct_field_to_json() {
+ let f = Field::new(
+ "address",
+ DataType::Struct(vec![
+ Field::new("street", DataType::Utf8, false),
+ Field::new("zip", DataType::UInt16, false),
+ ]),
+ false,
+ );
+ assert_eq!(
+ "{\"name\":\"address\",\"nullable\":false,\"type\":{\"fields\":[\
+
{\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},\
+
{\"name\":\"zip\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}",
+ f.to_json().to_string()
+ );
+ }
+
+ #[test]
+ fn primitive_field_to_json() {
+ let f = Field::new("first_name", DataType::Utf8, false);
+ assert_eq!(
+
"{\"name\":\"first_name\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}}",
+ f.to_json().to_string()
+ );
+ }
+ #[test]
+ fn parse_struct_from_json() {
+ let json =
"{\"name\":\"address\",\"nullable\":false,\"type\":{\"fields\":[\
+ {\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},\
+
{\"name\":\"zip\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = Field::from(&value).unwrap();
+
+ let expected = Field::new(
+ "address",
+ DataType::Struct(vec![
+ Field::new("street", DataType::Utf8, false),
+ Field::new("zip", DataType::UInt16, false),
+ ]),
+ false,
+ );
+
+ assert_eq!(expected, dt);
+ }
+
+ #[test]
+ fn parse_utf8_from_json() {
+ let json = "{\"name\":\"utf8\"}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = DataType::from(&value).unwrap();
+ assert_eq!(DataType::Utf8, dt);
+ }
+
+ #[test]
+ fn parse_int32_from_json() {
+ let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = DataType::from(&value).unwrap();
+ assert_eq!(DataType::Int32, dt);
+ }
}
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 8eeb34023..bfcd83dc5 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -18,6 +18,9 @@
extern crate bytes;
extern crate libc;
+#[macro_use]
+extern crate serde_json;
+
pub mod array;
pub mod bitmap;
pub mod buffer;
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Rust] Implement to_json() for Field and DataType
> -------------------------------------------------
>
> Key: ARROW-2385
> URL: https://issues.apache.org/jira/browse/ARROW-2385
> Project: Apache Arrow
> Issue Type: New Feature
> Components: Rust
> Reporter: Andy Grove
> Assignee: Andy Grove
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.10.0
>
>
> Implementing JSON representation of DataType and Field as per Arrow
> specifications.
>
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)