This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2541e2c88 Remove preserve_order feature from serde_json (#2095) (#2098)
2541e2c88 is described below
commit 2541e2c88913d115cbe85312d1dc0202457c050c
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Mon Jul 18 17:13:41 2022 -0400
Remove preserve_order feature from serde_json (#2095) (#2098)
* Remove preserve_order feature from serde_json (#2095)
* Fix tests
---
arrow/Cargo.toml | 2 +-
arrow/src/json/writer.rs | 93 ++++++++++++++++++++++++------------------
integration-testing/Cargo.toml | 2 +-
parquet/Cargo.toml | 12 +++---
4 files changed, 62 insertions(+), 47 deletions(-)
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 7b3d4c64a..151cd2987 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -41,7 +41,7 @@ bench = false
ahash = { version = "0.7", default-features = false }
serde = { version = "1.0", default-features = false }
serde_derive = { version = "1.0", default-features = false }
-serde_json = { version = "1.0", default-features = false, features =
["preserve_order"] }
+serde_json = { version = "1.0", default-features = false, features = ["std"] }
indexmap = { version = "1.9", default-features = false, features = ["std"] }
rand = { version = "0.8", default-features = false, features = ["std",
"std_rng"], optional = true }
num = { version = "0.4", default-features = false, features = ["std"] }
diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs
index 0755a5758..f21dad043 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow/src/json/writer.rs
@@ -745,6 +745,21 @@ mod tests {
use super::*;
+ /// Asserts that the NDJSON `input` is semantically identical to `expected`
+ fn assert_json_eq(input: &[u8], expected: &str) {
+ let expected: Vec<Option<Value>> = expected
+ .split('\n')
+ .map(|s| (!s.is_empty()).then(|| serde_json::from_str(s).unwrap()))
+ .collect();
+
+ let actual: Vec<Option<Value>> = input
+ .split(|b| *b == b'\n')
+ .map(|s| (!s.is_empty()).then(||
serde_json::from_slice(s).unwrap()))
+ .collect();
+
+ assert_eq!(expected, actual);
+ }
+
#[test]
fn write_simple_rows() {
let schema = Schema::new(vec![
@@ -765,14 +780,14 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"c1":1,"c2":"a"}
{"c1":2,"c2":"b"}
{"c1":3,"c2":"c"}
{"c2":"d"}
{"c1":5}
-"#
+"#,
);
}
@@ -796,14 +811,14 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"c1":"a","c2":"a"}
{"c2":"b"}
{"c1":"c"}
{"c1":"d","c2":"d"}
{}
-"#
+"#,
);
}
@@ -846,14 +861,14 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"c1":"cupcakes","c2":"sdsd"}
{"c1":"foo","c2":"sdsd"}
{"c1":"foo"}
{"c2":"sd"}
{"c1":"cupcakes","c2":"sdsd"}
-"#
+"#,
);
}
@@ -905,11 +920,11 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"nanos":"2018-11-13 17:11:10.011375885","micros":"2018-11-13
17:11:10.011375","millis":"2018-11-13 17:11:10.011","secs":"2018-11-13
17:11:10","name":"a"}
{"name":"b"}
-"#
+"#,
);
}
@@ -951,11 +966,11 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"date32":"2018-11-13","date64":"2018-11-13","name":"a"}
{"name":"b"}
-"#
+"#,
);
}
@@ -994,11 +1009,11 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
{"name":"b"}
-"#
+"#,
);
}
@@ -1037,11 +1052,11 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"duration_sec":"PT120S","duration_msec":"PT0.120S","duration_usec":"PT0.000120S","duration_nsec":"PT0.000000120S","name":"a"}
{"name":"b"}
-"#
+"#,
);
}
@@ -1093,12 +1108,12 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"}
{"c1":{"c12":{"c121":"f"}},"c2":"b"}
{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}
-"#
+"#,
);
}
@@ -1136,14 +1151,14 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"c1":["a","a1"],"c2":1}
{"c1":["b"],"c2":2}
{"c1":["c"],"c2":3}
{"c1":["d"],"c2":4}
{"c1":["e"],"c2":5}
-"#
+"#,
);
}
@@ -1196,12 +1211,12 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"c1":[[1,2],[3]],"c2":"foo"}
{"c1":[],"c2":"bar"}
{"c1":[[4,5,6]]}
-"#
+"#,
);
}
@@ -1271,12 +1286,12 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"c1":[{"c11":1,"c12":{"c121":"e"}},{"c12":{"c121":"f"}}],"c2":1}
{"c2":2}
{"c1":[{"c11":5,"c12":{"c121":"g"}}],"c2":3}
-"#
+"#,
);
}
@@ -1396,15 +1411,15 @@ mod tests {
// that implementations differ on the treatment of a null struct.
// It would be more accurate to return a null struct, so this can be
done
// as a follow up.
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"list":[{"ints":1}]}
{"list":[{}]}
{"list":[]}
{}
{"list":[{}]}
{"list":[{}]}
-"#
+"#,
);
}
@@ -1455,15 +1470,15 @@ mod tests {
writer.write_batches(&[batch]).unwrap();
}
- assert_eq!(
- String::from_utf8(buf).unwrap(),
+ assert_json_eq(
+ &buf,
r#"{"map":{"foo":10}}
{"map":null}
{"map":{}}
{"map":{"bar":20,"baz":30,"qux":40}}
{"map":{"quux":50}}
{"map":{}}
-"#
+"#,
);
}
diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml
index 4cff73aa7..897c7cfa5 100644
--- a/integration-testing/Cargo.toml
+++ b/integration-testing/Cargo.toml
@@ -40,7 +40,7 @@ hex = { version = "0.4", default-features = false }
prost = { version = "0.10", default-features = false }
serde = { version = "1.0", default-features = false, features = ["rc"] }
serde_derive = { version = "1.0", default-features = false }
-serde_json = { version = "1.0", default-features = false, features =
["preserve_order"] }
+serde_json = { version = "1.0", default-features = false, features = ["std"] }
tokio = { version = "1.0", default-features = false }
tonic = { version = "0.7", default-features = false }
tracing-subscriber = { version = "0.3.1", default-features = false, features =
["fmt"], optional = true }
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index 64819077a..498c85441 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -45,9 +45,9 @@ num-bigint = { version = "0.4", default-features = false }
arrow = { path = "../arrow", version = "18.0.0", optional = true,
default-features = false, features = ["ipc"] }
base64 = { version = "0.13", default-features = false, features = ["std"],
optional = true }
clap = { version = "3", default-features = false, features = ["std", "derive",
"env"], optional = true }
-serde_json = { version = "1.0", default-features = false, optional = true }
+serde_json = { version = "1.0", default-features = false, features = ["std"],
optional = true }
rand = { version = "0.8", default-features = false, features = ["std",
"std_rng"] }
-futures = { version = "0.3", default-features = false, features = ["std" ],
optional = true }
+futures = { version = "0.3", default-features = false, features = ["std"],
optional = true }
tokio = { version = "1.0", optional = true, default-features = false, features
= ["macros", "fs", "rt", "io-util"] }
[dev-dependencies]
@@ -55,11 +55,11 @@ base64 = { version = "0.13", default-features = false,
features = ["std"] }
criterion = { version = "0.3", default-features = false }
snap = { version = "1.0", default-features = false }
tempfile = { version = "3.0", default-features = false }
-brotli = { version = "3.3", default-features = false, features = [ "std" ] }
-flate2 = { version = "1.0", default-features = false, features = [
"rust_backend" ] }
+brotli = { version = "3.3", default-features = false, features = ["std"] }
+flate2 = { version = "1.0", default-features = false, features =
["rust_backend"] }
lz4 = { version = "1.23", default-features = false }
zstd = { version = "0.11", default-features = false }
-serde_json = { version = "1.0", default-features = false, features =
["preserve_order"] }
+serde_json = { version = "1.0", features = ["std"], default-features = false }
arrow = { path = "../arrow", version = "18.0.0", default-features = false,
features = ["ipc", "test_utils", "prettyprint"] }
[package.metadata.docs.rs]
@@ -70,7 +70,7 @@ default = ["arrow", "snap", "brotli", "flate2", "lz4",
"zstd", "base64"]
# Enable arrow reader/writer APIs
arrow = ["dep:arrow", "base64"]
# Enable CLI tools
-cli = ["serde_json", "base64", "clap","arrow/csv"]
+cli = ["serde_json", "base64", "clap", "arrow/csv"]
# Enable internal testing APIs
test_common = ["arrow/test_utils"]
# Experimental, unstable functionality primarily used for testing