This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ef5c58ce5 Add Field Constructors for Complex Fields (#3992)
ef5c58ce5 is described below
commit ef5c58ce5d7e8a13cbd8e1fa4d6a5310951a6de0
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Fri Mar 31 19:06:01 2023 +0100
Add Field Constructors for Complex Fields (#3992)
* Improve ergonomics of declaring complex fields
* Apply suggestions from code review
Co-authored-by: Andrew Lamb <[email protected]>
* Review feedback
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-array/src/record_batch.rs | 11 +-
arrow-cast/src/cast.rs | 6 +-
arrow-cast/src/pretty.rs | 93 ++++-------
arrow-csv/src/reader/mod.rs | 12 +-
arrow-csv/src/writer.rs | 6 +-
arrow-flight/tests/encode_decode.rs | 6 +-
arrow-integration-test/src/field.rs | 67 +++-----
arrow-ipc/src/convert.rs | 116 ++++++-------
arrow-ipc/src/reader.rs | 13 +-
arrow-ipc/src/writer.rs | 19 +--
arrow-json/src/raw/mod.rs | 91 +++++------
arrow-json/src/writer.rs | 12 +-
arrow-schema/src/field.rs | 113 ++++++++++++-
arrow-schema/src/schema.rs | 65 +++-----
parquet/src/arrow/arrow_writer/mod.rs | 30 ++--
parquet/src/arrow/schema/complex.rs | 6 +-
parquet/src/arrow/schema/mod.rs | 295 ++++++++++++----------------------
17 files changed, 424 insertions(+), 537 deletions(-)
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 8d4d04f0f..db4bb1230 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -708,16 +708,11 @@ mod tests {
#[test]
fn create_record_batch_field_name_mismatch() {
- let struct_fields = vec![
+ let fields = vec![
Field::new("a1", DataType::Int32, false),
- Field::new(
- "a2",
- DataType::List(Arc::new(Field::new("item", DataType::Int8,
false))),
- false,
- ),
+ Field::new_list("a2", Field::new("item", DataType::Int8, false),
false),
];
- let struct_type = DataType::Struct(struct_fields.into());
- let schema = Arc::new(Schema::new(vec![Field::new("a", struct_type,
true)]));
+ let schema = Arc::new(Schema::new(vec![Field::new_struct("a", fields,
true)]));
let a1: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
let a2_child = Int8Array::from(vec![1, 2, 3, 4]);
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 02b87e731..9886decd9 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -7104,12 +7104,12 @@ mod tests {
fn test_cast_null_from_and_to_nested_type() {
// Cast null from and to map
let data_type = DataType::Map(
- Arc::new(Field::new(
+ Arc::new(Field::new_struct(
"entry",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("key", DataType::Utf8, false),
Field::new("value", DataType::Int32, true),
- ])),
+ ],
false,
)),
false,
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index 7aa04a2db..c75721ab8 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -257,9 +257,8 @@ mod tests {
#[test]
fn test_pretty_format_dictionary() {
// define a schema.
- let field_type =
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8));
- let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type,
true)]));
+ let field = Field::new_dictionary("d1", DataType::Int32,
DataType::Utf8, true);
+ let schema = Arc::new(Schema::new(vec![field]));
let mut builder = StringDictionaryBuilder::<Int32Type>::new();
@@ -633,18 +632,16 @@ mod tests {
#[test]
fn test_pretty_format_struct() {
let schema = Schema::new(vec![
- Field::new(
+ Field::new_struct(
"c1",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("c11", DataType::Int32, true),
- Field::new(
+ Field::new_struct(
"c12",
- DataType::Struct(
- vec![Field::new("c121", DataType::Utf8,
false)].into(),
- ),
+ vec![Field::new("c121", DataType::Utf8, false)],
false,
),
- ])),
+ ],
false,
),
Field::new("c2", DataType::Utf8, false),
@@ -656,11 +653,9 @@ mod tests {
Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as
ArrayRef,
),
(
- Field::new(
+ Field::new_struct(
"c12",
- DataType::Struct(
- vec![Field::new("c121", DataType::Utf8, false)].into(),
- ),
+ vec![Field::new("c121", DataType::Utf8, false)],
false,
),
Arc::new(StructArray::from(vec![(
@@ -700,19 +695,14 @@ mod tests {
builder.append_null::<Int32Type>("a").unwrap();
let union = builder.build().unwrap();
- let schema = Schema::new(vec![Field::new(
+ let schema = Schema::new(vec![Field::new_union(
"Teamsters",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![
- Field::new("a", DataType::Int32, false),
- Field::new("b", DataType::Float64, false),
- ],
- ),
- UnionMode::Dense,
- ),
- false,
+ vec![0, 1],
+ vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Float64, false),
+ ],
+ UnionMode::Dense,
)]);
let batch =
@@ -742,19 +732,14 @@ mod tests {
builder.append_null::<Int32Type>("a").unwrap();
let union = builder.build().unwrap();
- let schema = Schema::new(vec![Field::new(
+ let schema = Schema::new(vec![Field::new_union(
"Teamsters",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![
- Field::new("a", DataType::Int32, false),
- Field::new("b", DataType::Float64, false),
- ],
- ),
- UnionMode::Sparse,
- ),
- false,
+ vec![0, 1],
+ vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Float64, false),
+ ],
+ UnionMode::Sparse,
)]);
let batch =
@@ -786,19 +771,14 @@ mod tests {
builder.append_null::<Float64Type>("c").unwrap();
let inner = builder.build().unwrap();
- let inner_field = Field::new(
+ let inner_field = Field::new_union(
"European Union",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![
- Field::new("b", DataType::Int32, false),
- Field::new("c", DataType::Float64, false),
- ],
- ),
- UnionMode::Dense,
- ),
- false,
+ vec![0, 1],
+ vec![
+ Field::new("b", DataType::Int32, false),
+ Field::new("c", DataType::Float64, false),
+ ],
+ UnionMode::Dense,
);
// Can't use UnionBuilder with non-primitive types, so manually build
outer UnionArray
@@ -812,16 +792,11 @@ mod tests {
let outer = UnionArray::try_new(&[0, 1], type_ids, None,
children).unwrap();
- let schema = Schema::new(vec![Field::new(
+ let schema = Schema::new(vec![Field::new_union(
"Teamsters",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![Field::new("a", DataType::Int32, true), inner_field],
- ),
- UnionMode::Sparse,
- ),
- false,
+ vec![0, 1],
+ vec![Field::new("a", DataType::Int32, true), inner_field],
+ UnionMode::Sparse,
)]);
let batch =
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index 7fecc1ad9..3fa712819 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -1485,11 +1485,7 @@ mod tests {
#[test]
fn test_csv_with_dictionary() {
let schema = Schema::new(vec![
- Field::new(
- "city",
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
- false,
- ),
+ Field::new_dictionary("city", DataType::Int32, DataType::Utf8,
false),
Field::new("lat", DataType::Float64, false),
Field::new("lng", DataType::Float64, false),
]);
@@ -1507,11 +1503,7 @@ mod tests {
None,
);
let projected_schema = Arc::new(Schema::new(vec![
- Field::new(
- "city",
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
- false,
- ),
+ Field::new_dictionary("city", DataType::Int32, DataType::Utf8,
false),
Field::new("lat", DataType::Float64, false),
]));
assert_eq!(projected_schema, csv.schema());
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index 946803dec..90c32832a 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -347,11 +347,7 @@ mod tests {
Field::new("c4", DataType::Boolean, true),
Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None),
true),
Field::new("c6", DataType::Time32(TimeUnit::Second), false),
- Field::new(
- "c7",
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
- false,
- ),
+ Field::new_dictionary("c7", DataType::Int32, DataType::Utf8,
false),
]);
let c1 = StringArray::from(vec![
diff --git a/arrow-flight/tests/encode_decode.rs
b/arrow-flight/tests/encode_decode.rs
index ec86fbcc0..90fa2b7a6 100644
--- a/arrow-flight/tests/encode_decode.rs
+++ b/arrow-flight/tests/encode_decode.rs
@@ -140,11 +140,7 @@ async fn test_zero_batches_schema_specified() {
async fn test_zero_batches_dictionary_schema_specified() {
let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int64, false),
- Field::new(
- "b",
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
- false,
- ),
+ Field::new_dictionary("b", DataType::Int32, DataType::Utf8, false),
]));
// Expect dictionary to be hydrated in output (#3389)
diff --git a/arrow-integration-test/src/field.rs
b/arrow-integration-test/src/field.rs
index a0cd4adc8..f59314ca0 100644
--- a/arrow-integration-test/src/field.rs
+++ b/arrow-integration-test/src/field.rs
@@ -303,17 +303,17 @@ pub fn field_to_json(field: &Field) -> serde_json::Value {
#[cfg(test)]
mod tests {
use super::*;
- use arrow::datatypes::{Fields, UnionFields, UnionMode};
+ use arrow::datatypes::UnionMode;
use serde_json::Value;
#[test]
fn struct_field_to_json() {
- let f = Field::new(
+ let f = Field::new_struct(
"address",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("street", DataType::Utf8, false),
Field::new("zip", DataType::UInt16, false),
- ])),
+ ],
false,
);
let value: Value = serde_json::from_str(
@@ -351,19 +351,12 @@ mod tests {
#[test]
fn map_field_to_json() {
- let f = Field::new(
+ let f = Field::new_map(
"my_map",
- DataType::Map(
- Arc::new(Field::new(
- "my_entries",
- DataType::Struct(Fields::from(vec![
- Field::new("my_keys", DataType::Utf8, false),
- Field::new("my_values", DataType::UInt16, true),
- ])),
- false,
- )),
- true,
- ),
+ "my_entries",
+ Field::new("my_keys", DataType::Utf8, false),
+ Field::new("my_values", DataType::UInt16, true),
+ true,
false,
);
let value: Value = serde_json::from_str(
@@ -459,12 +452,12 @@ mod tests {
let value: Value = serde_json::from_str(json).unwrap();
let dt = field_from_json(&value).unwrap();
- let expected = Field::new(
+ let expected = Field::new_struct(
"address",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("street", DataType::Utf8, false),
Field::new("zip", DataType::UInt16, false),
- ])),
+ ],
false,
);
@@ -515,19 +508,12 @@ mod tests {
let value: Value = serde_json::from_str(json).unwrap();
let dt = field_from_json(&value).unwrap();
- let expected = Field::new(
+ let expected = Field::new_map(
"my_map",
- DataType::Map(
- Arc::new(Field::new(
- "my_entries",
- DataType::Struct(Fields::from(vec![
- Field::new("my_keys", DataType::Utf8, false),
- Field::new("my_values", DataType::UInt16, true),
- ])),
- false,
- )),
- true,
- ),
+ "my_entries",
+ Field::new("my_keys", DataType::Utf8, false),
+ Field::new("my_values", DataType::UInt16, true),
+ true,
false,
);
@@ -573,19 +559,14 @@ mod tests {
let value: Value = serde_json::from_str(json).unwrap();
let dt = field_from_json(&value).unwrap();
- let expected = Field::new(
+ let expected = Field::new_union(
"my_union",
- DataType::Union(
- UnionFields::new(
- vec![5, 7],
- vec![
- Field::new("f1", DataType::Int32, true),
- Field::new("f2", DataType::Utf8, true),
- ],
- ),
- UnionMode::Sparse,
- ),
- false,
+ vec![5, 7],
+ vec![
+ Field::new("f1", DataType::Int32, true),
+ Field::new("f2", DataType::Utf8, true),
+ ],
+ UnionMode::Sparse,
);
assert_eq!(expected, dt);
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index cc2a7786c..c7b5559fa 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -906,107 +906,91 @@ mod tests {
),
Field::new("utf8", DataType::Utf8, false),
Field::new("binary", DataType::Binary, false),
- Field::new(
+ Field::new_list(
"list[u8]",
- DataType::List(Arc::new(Field::new("item",
DataType::UInt8, false))),
+ Field::new("item", DataType::UInt8, false),
true,
),
- Field::new(
+ Field::new_list(
"list[struct<float32, int32, bool>]",
- List(Arc::new(Field::new(
+ Field::new_struct(
"struct",
- Struct(Fields::from(vec![
- Field::new("float32", DataType::UInt8, false),
- Field::new("int32", DataType::Int32, true),
- Field::new("bool", DataType::Boolean, true),
- ])),
+ vec![
+ Field::new("float32", UInt8, false),
+ Field::new("int32", Int32, true),
+ Field::new("bool", Boolean, true),
+ ],
true,
- ))),
+ ),
false,
),
- Field::new(
+ Field::new_struct(
"struct<dictionary<int32, utf8>>",
- Struct(Fields::from(vec![Field::new(
+ vec![Field::new(
"dictionary<int32, utf8>",
Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
false,
- )])),
+ )],
false,
),
- Field::new(
+ Field::new_struct(
"struct<int64, list[struct<date32, list[struct<>]>]>",
- Struct(Fields::from(vec![
+ vec![
Field::new("int64", DataType::Int64, true),
- Field::new(
+ Field::new_list(
"list[struct<date32, list[struct<>]>]",
- DataType::List(Arc::new(Field::new(
+ Field::new_struct(
"struct",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("date32", DataType::Date32,
true),
- Field::new(
+ Field::new_list(
"list[struct<>]",
- DataType::List(Arc::new(Field::new(
+ Field::new(
"struct",
DataType::Struct(Fields::empty()),
false,
- ))),
+ ),
false,
),
- ])),
+ ],
false,
- ))),
+ ),
false,
),
- ])),
+ ],
false,
),
- Field::new(
+ Field::new_union(
"union<int64, list[union<date32, list[union<>]>]>",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![
- Field::new("int64", DataType::Int64, true),
- Field::new(
- "list[union<date32, list[union<>]>]",
- DataType::List(Arc::new(Field::new(
- "union<date32, list[union<>]>",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![
- Field::new(
- "date32",
- DataType::Date32,
- true,
- ),
- Field::new(
- "list[union<>]",
-
DataType::List(Arc::new(
- Field::new(
- "union",
-
DataType::Union(
-
UnionFields::empty(),
-
UnionMode::Sparse,
- ),
- false,
- ),
- )),
- false,
- ),
- ],
+ vec![0, 1],
+ vec![
+ Field::new("int64", DataType::Int64, true),
+ Field::new_list(
+ "list[union<date32, list[union<>]>]",
+ Field::new_union(
+ "union<date32, list[union<>]>",
+ vec![0, 1],
+ vec![
+ Field::new("date32", DataType::Date32,
true),
+ Field::new_list(
+ "list[union<>]",
+ Field::new(
+ "union",
+ DataType::Union(
+ UnionFields::empty(),
+ UnionMode::Sparse,
),
- UnionMode::Dense,
+ false,
),
false,
- ))),
- false,
- ),
- ],
+ ),
+ ],
+ UnionMode::Dense,
+ ),
+ false,
),
- UnionMode::Sparse,
- ),
- false,
+ ],
+ UnionMode::Sparse,
),
Field::new("struct<>", DataType::Struct(Fields::empty()),
true),
Field::new(
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index c20f7bd01..75d078456 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -1261,11 +1261,6 @@ mod tests {
3,
);
- let key_type = DataType::Int8;
- let value_type = DataType::Utf8;
- let dict_data_type =
- DataType::Dictionary(Box::new(key_type), Box::new(value_type));
-
let union_fields = UnionFields::new(
vec![0, 1],
vec![
@@ -1278,11 +1273,7 @@ mod tests {
let struct_fields = Fields::from(vec![
Field::new("id", DataType::Int32, false),
- Field::new(
- "list",
- DataType::List(Arc::new(Field::new("item", DataType::Int8,
true))),
- false,
- ),
+ Field::new_list("list", Field::new("item", DataType::Int8, true),
false),
]);
let struct_data_type = DataType::Struct(struct_fields);
@@ -1305,7 +1296,7 @@ mod tests {
Field::new("f9", struct_data_type, false),
Field::new("f10", run_encoded_data_type, false),
Field::new("f11", DataType::Boolean, false),
- Field::new("f12", dict_data_type, false),
+ Field::new_dictionary("f12", DataType::Int8, DataType::Utf8,
false),
Field::new("f13", DataType::Utf8, false),
])
}
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index 0e999dc72..12c173f64 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -1778,19 +1778,14 @@ mod tests {
}
fn write_union_file(options: IpcWriteOptions) {
- let schema = Schema::new(vec![Field::new(
+ let schema = Schema::new(vec![Field::new_union(
"union",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![
- Field::new("a", DataType::Int32, false),
- Field::new("c", DataType::Float64, false),
- ],
- ),
- UnionMode::Sparse,
- ),
- true,
+ vec![0, 1],
+ vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("c", DataType::Float64, false),
+ ],
+ UnionMode::Sparse,
)]);
let mut builder = UnionBuilder::with_capacity_sparse(5);
builder.append::<Int32Type>("a", 1).unwrap();
diff --git a/arrow-json/src/raw/mod.rs b/arrow-json/src/raw/mod.rs
index c784bd347..1bae8ac52 100644
--- a/arrow-json/src/raw/mod.rs
+++ b/arrow-json/src/raw/mod.rs
@@ -362,7 +362,7 @@ mod tests {
use arrow_array::{Array, StructArray};
use arrow_buffer::ArrowNativeType;
use arrow_cast::display::{ArrayFormatter, FormatOptions};
- use arrow_schema::{DataType, Field, Fields, Schema};
+ use arrow_schema::{DataType, Field, Schema};
use std::fs::File;
use std::io::{BufReader, Cursor, Seek};
use std::sync::Arc;
@@ -503,32 +503,26 @@ mod tests {
"#;
let schema = Arc::new(Schema::new(vec![
- Field::new(
- "list",
- DataType::List(Arc::new(Field::new("element", DataType::Int32,
false))),
- true,
- ),
- Field::new(
+ Field::new_list("list", Field::new("element", DataType::Int32,
false), true),
+ Field::new_struct(
"nested",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("a", DataType::Int32, true),
Field::new("b", DataType::Int32, true),
- ])),
+ ],
true,
),
- Field::new(
+ Field::new_struct(
"nested_list",
- DataType::Struct(Fields::from(vec![Field::new(
+ vec![Field::new_list(
"list2",
- DataType::List(Arc::new(Field::new(
+ Field::new_struct(
"element",
- DataType::Struct(
- vec![Field::new("c", DataType::Int32,
false)].into(),
- ),
+ vec![Field::new("c", DataType::Int32, false)],
false,
- ))),
+ ),
true,
- )])),
+ )],
true,
),
]));
@@ -582,24 +576,22 @@ mod tests {
"#;
let schema = Arc::new(Schema::new(vec![
- Field::new(
+ Field::new_struct(
"nested",
- DataType::Struct(vec![Field::new("a", DataType::Int32,
false)].into()),
+ vec![Field::new("a", DataType::Int32, false)],
true,
),
- Field::new(
+ Field::new_struct(
"nested_list",
- DataType::Struct(Fields::from(vec![Field::new(
+ vec![Field::new_list(
"list2",
- DataType::List(Arc::new(Field::new(
+ Field::new_struct(
"element",
- DataType::Struct(
- vec![Field::new("d", DataType::Int32,
true)].into(),
- ),
+ vec![Field::new("d", DataType::Int32, true)],
false,
- ))),
+ ),
true,
- )])),
+ )],
true,
),
]));
@@ -639,14 +631,16 @@ mod tests {
{"map": {"a": [null], "b": []}}
{"map": {"c": null, "a": ["baz"]}}
"#;
- let list = DataType::List(Arc::new(Field::new("element",
DataType::Utf8, true)));
- let entries = DataType::Struct(Fields::from(vec![
+ let map = Field::new_map(
+ "map",
+ "entries",
Field::new("key", DataType::Utf8, false),
- Field::new("value", list, true),
- ]));
+ Field::new_list("value", Field::new("element", DataType::Utf8,
true), true),
+ false,
+ true,
+ );
- let map = DataType::Map(Arc::new(Field::new("entries", entries,
true)), false);
- let schema = Arc::new(Schema::new(vec![Field::new("map", map, true)]));
+ let schema = Arc::new(Schema::new(vec![map]));
let batches = do_read(buf, 1024, false, schema);
assert_eq!(batches.len(), 1);
@@ -1010,31 +1004,24 @@ mod tests {
fn test_delta_checkpoint() {
let json =
"{\"protocol\":{\"minReaderVersion\":1,\"minWriterVersion\":2}}";
let schema = Arc::new(Schema::new(vec![
- Field::new(
+ Field::new_struct(
"protocol",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("minReaderVersion", DataType::Int32, true),
Field::new("minWriterVersion", DataType::Int32, true),
- ])),
+ ],
true,
),
- Field::new(
+ Field::new_struct(
"add",
- DataType::Struct(Fields::from(vec![Field::new(
+ vec![Field::new_map(
"partitionValues",
- DataType::Map(
- Arc::new(Field::new(
- "key_value",
- DataType::Struct(Fields::from(vec![
- Field::new("key", DataType::Utf8, false),
- Field::new("value", DataType::Utf8, true),
- ])),
- false,
- )),
- false,
- ),
+ "key_value",
+ Field::new("key", DataType::Utf8, false),
+ Field::new("value", DataType::Utf8, true),
+ false,
false,
- )])),
+ )],
true,
),
]));
@@ -1056,9 +1043,9 @@ mod tests {
let do_test = |child: DataType| {
// Test correctly enforced nullability
let non_null = r#"{"foo": {}}"#;
- let schema = Arc::new(Schema::new(vec![Field::new(
+ let schema = Arc::new(Schema::new(vec![Field::new_struct(
"foo",
- DataType::Struct(vec![Field::new("bar", child, false)].into()),
+ vec![Field::new("bar", child, false)],
true,
)]));
let mut reader = RawReaderBuilder::new(schema.clone())
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index 1b950f794..d66d32017 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -680,16 +680,8 @@ mod tests {
#[test]
fn write_dictionary() {
let schema = Schema::new(vec![
- Field::new(
- "c1",
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
- true,
- ),
- Field::new(
- "c2",
- DataType::Dictionary(Box::new(DataType::Int8),
Box::new(DataType::Utf8)),
- true,
- ),
+ Field::new_dictionary("c1", DataType::Int32, DataType::Utf8, true),
+ Field::new_dictionary("c2", DataType::Int8, DataType::Utf8, true),
]);
let a: DictionaryArray<Int32Type> = vec![
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index ac02eadd6..1af157e4d 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -19,12 +19,14 @@ use crate::error::ArrowError;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
+use std::sync::Arc;
use crate::datatype::DataType;
use crate::schema::SchemaBuilder;
+use crate::{Fields, UnionFields, UnionMode};
/// A reference counted [`Field`]
-pub type FieldRef = std::sync::Arc<Field>;
+pub type FieldRef = Arc<Field>;
/// Describes a single column in a [`Schema`](super::Schema).
///
@@ -145,6 +147,115 @@ impl Field {
}
}
+ /// Create a new [`Field`] with [`DataType::Dictionary`]
+ ///
+ /// Use [`Self::new_dict`] for more advanced dictionary options
+ ///
+ /// # Panics
+ ///
+ /// Panics if
[`!key.is_dictionary_key_type`][DataType::is_dictionary_key_type]
+ pub fn new_dictionary(
+ name: impl Into<String>,
+ key: DataType,
+ value: DataType,
+ nullable: bool,
+ ) -> Self {
+ assert!(
+ key.is_dictionary_key_type(),
+ "{key} is not a valid dictionary key"
+ );
+ let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
+ Self::new(name, data_type, nullable)
+ }
+
+ /// Create a new [`Field`] with [`DataType::Struct`]
+ ///
+ /// - `name`: the name of the [`DataType::List`] field
+ /// - `fields`: the description of each struct element
+ /// - `nullable`: if the [`DataType::Struct`] array is nullable
+ pub fn new_struct(
+ name: impl Into<String>,
+ fields: impl Into<Fields>,
+ nullable: bool,
+ ) -> Self {
+ Self::new(name, DataType::Struct(fields.into()), nullable)
+ }
+
+ /// Create a new [`Field`] with [`DataType::List`]
+ ///
+ /// - `name`: the name of the [`DataType::List`] field
+ /// - `value`: the description of each list element
+ /// - `nullable`: if the [`DataType::List`] array is nullable
+ ///
+ /// Uses "item" as the name of the child field, this can be overridden
with [`Self::new`]
+ pub fn new_list(
+ name: impl Into<String>,
+ value: impl Into<FieldRef>,
+ nullable: bool,
+ ) -> Self {
+ Self::new(name, DataType::List(value.into()), nullable)
+ }
+
+ /// Create a new [`Field`] with [`DataType::LargeList`]
+ ///
+ /// - `name`: the name of the [`DataType::LargeList`] field
+ /// - `value`: the description of each list element
+ /// - `nullable`: if the [`DataType::LargeList`] array is nullable
+ pub fn new_large_list(
+ name: impl Into<String>,
+ value: impl Into<FieldRef>,
+ nullable: bool,
+ ) -> Self {
+ Self::new(name, DataType::LargeList(value.into()), nullable)
+ }
+
+ /// Create a new [`Field`] with [`DataType::Map`]
+ ///
+ /// - `name`: the name of the [`DataType::Map`] field
+ /// - `entries`: the name of the inner [`DataType::Struct`] field
+ /// - `keys`: the map keys
+ /// - `values`: the map values
+ /// - `sorted`: if the [`DataType::Map`] array is sorted
+ /// - `nullable`: if the [`DataType::Map`] array is nullable
+ pub fn new_map(
+ name: impl Into<String>,
+ entries: impl Into<String>,
+ keys: impl Into<FieldRef>,
+ values: impl Into<FieldRef>,
+ sorted: bool,
+ nullable: bool,
+ ) -> Self {
+ let data_type = DataType::Map(
+ Arc::new(Field::new(
+ entries.into(),
+ DataType::Struct(Fields::from([keys.into(), values.into()])),
+ false, // The inner map field is always non-nullable (#1697),
+ )),
+ sorted,
+ );
+ Self::new(name, data_type, nullable)
+ }
+
+ /// Create a new [`Field`] with [`DataType::Union`]
+ ///
+ /// - `name`: the name of the [`DataType::Union`] field
+ /// - `type_ids`: the union type ids
+ /// - `fields`: the union fields
+ /// - `mode`: the union mode
+ pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode:
UnionMode) -> Self
+ where
+ S: Into<String>,
+ F: IntoIterator,
+ F::Item: Into<FieldRef>,
+ T: IntoIterator<Item = i8>,
+ {
+ Self::new(
+ name,
+ DataType::Union(UnionFields::new(type_ids, fields), mode),
+ false, // Unions cannot be nullable
+ )
+ }
+
/// Sets the `Field`'s optional custom metadata.
#[inline]
pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index 501c5c7fd..2cc892f5a 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -365,7 +365,7 @@ impl Hash for Schema {
#[cfg(test)]
mod tests {
use crate::datatype::DataType;
- use crate::{TimeUnit, UnionFields, UnionMode};
+ use crate::{TimeUnit, UnionMode};
use super::*;
@@ -775,54 +775,35 @@ mod tests {
// support merge union fields
assert_eq!(
Schema::try_merge(vec![
- Schema::new(vec![Field::new(
+ Schema::new(vec![Field::new_union(
"c1",
- DataType::Union(
- UnionFields::new(
- vec![0, 1],
- vec![
- Field::new("c11", DataType::Utf8, true),
- Field::new("c12", DataType::Utf8, true),
- ]
- ),
- UnionMode::Dense
- ),
- false
+ vec![0, 1],
+ vec![
+ Field::new("c11", DataType::Utf8, true),
+ Field::new("c12", DataType::Utf8, true),
+ ],
+ UnionMode::Dense
),]),
- Schema::new(vec![Field::new(
+ Schema::new(vec![Field::new_union(
"c1",
- DataType::Union(
- UnionFields::new(
- vec![1, 2],
- vec![
- Field::new("c12", DataType::Utf8, true),
- Field::new(
- "c13",
- DataType::Time64(TimeUnit::Second),
- true
- ),
- ]
- ),
- UnionMode::Dense
- ),
- false
+ vec![1, 2],
+ vec![
+ Field::new("c12", DataType::Utf8, true),
+ Field::new("c13", DataType::Time64(TimeUnit::Second),
true),
+ ],
+ UnionMode::Dense
),])
])
.unwrap(),
- Schema::new(vec![Field::new(
+ Schema::new(vec![Field::new_union(
"c1",
- DataType::Union(
- UnionFields::new(
- vec![0, 1, 2],
- vec![
- Field::new("c11", DataType::Utf8, true),
- Field::new("c12", DataType::Utf8, true),
- Field::new("c13",
DataType::Time64(TimeUnit::Second), true),
- ]
- ),
- UnionMode::Dense
- ),
- false
+ vec![0, 1, 2],
+ vec![
+ Field::new("c11", DataType::Utf8, true),
+ Field::new("c12", DataType::Utf8, true),
+ Field::new("c13", DataType::Time64(TimeUnit::Second),
true),
+ ],
+ UnionMode::Dense
),]),
);
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index 0515ed4e3..86f7764ec 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -888,36 +888,28 @@ mod tests {
// define schema
let struct_field_d = Field::new("d", DataType::Float64, true);
let struct_field_f = Field::new("f", DataType::Float32, true);
- let struct_field_g = Field::new(
- "g",
- DataType::List(Arc::new(Field::new("item", DataType::Int16,
true))),
- false,
- );
- let struct_field_h = Field::new(
- "h",
- DataType::List(Arc::new(Field::new("item", DataType::Int16,
false))),
- true,
- );
- let struct_field_e = Field::new(
+ let struct_field_g =
+ Field::new_list("g", Field::new("item", DataType::Int16, true),
false);
+ let struct_field_h =
+ Field::new_list("h", Field::new("item", DataType::Int16, false),
true);
+ let struct_field_e = Field::new_struct(
"e",
- DataType::Struct(Fields::from(vec![
+ vec![
struct_field_f.clone(),
struct_field_g.clone(),
struct_field_h.clone(),
- ])),
+ ],
false,
);
- let schema = Schema::new(Fields::from(vec![
+ let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, true),
- Field::new(
+ Field::new_struct(
"c",
- DataType::Struct(
- vec![struct_field_d.clone(),
struct_field_e.clone()].into(),
- ),
+ vec![struct_field_d.clone(), struct_field_e.clone()],
false,
),
- ]));
+ ]);
// create some data
let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
diff --git a/parquet/src/arrow/schema/complex.rs
b/parquet/src/arrow/schema/complex.rs
index 25227aeee..c1699aafc 100644
--- a/parquet/src/arrow/schema/complex.rs
+++ b/parquet/src/arrow/schema/complex.rs
@@ -24,7 +24,7 @@ use crate::basic::{ConvertedType, Repetition};
use crate::errors::ParquetError;
use crate::errors::Result;
use crate::schema::types::{SchemaDescriptor, Type, TypePtr};
-use arrow_schema::{DataType, Field, Fields, Schema, SchemaBuilder};
+use arrow_schema::{DataType, Field, Schema, SchemaBuilder};
fn get_repetition(t: &Type) -> Repetition {
let info = t.get_basic_info();
@@ -351,9 +351,9 @@ impl Visitor {
_ => HashMap::default(),
};
- let map_field = Field::new(
+ let map_field = Field::new_struct(
map_key_value.name(),
- DataType::Struct(Fields::from([key_field, value_field])),
+ [key_field, value_field],
false, // The inner map field is always non-nullable
(#1697)
)
.with_metadata(field_metadata);
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 81ed5e817..399dcba9e 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -709,9 +709,9 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"my_list",
- DataType::List(Arc::new(Field::new("element", DataType::Utf8,
true))),
+ Field::new("element", DataType::Utf8, true),
false,
));
}
@@ -723,9 +723,9 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"my_list",
- DataType::List(Arc::new(Field::new("element", DataType::Utf8,
false))),
+ Field::new("element", DataType::Utf8, false),
true,
));
}
@@ -743,11 +743,10 @@ mod tests {
// }
// }
{
- let arrow_inner_list =
- DataType::List(Arc::new(Field::new("element", DataType::Int32,
false)));
- arrow_fields.push(Field::new(
+ let arrow_inner_list = Field::new("element", DataType::Int32,
false);
+ arrow_fields.push(Field::new_list(
"array_of_arrays",
- DataType::List(Arc::new(Field::new("element",
arrow_inner_list, false))),
+ Field::new_list("element", arrow_inner_list, false),
true,
));
}
@@ -759,9 +758,9 @@ mod tests {
// };
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"my_list",
- DataType::List(Arc::new(Field::new("str", DataType::Utf8,
false))),
+ Field::new("str", DataType::Utf8, false),
true,
));
}
@@ -771,9 +770,9 @@ mod tests {
// repeated int32 element;
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"my_list",
- DataType::List(Arc::new(Field::new("element", DataType::Int32,
false))),
+ Field::new("element", DataType::Int32, false),
true,
));
}
@@ -786,13 +785,13 @@ mod tests {
// };
// }
{
- let arrow_struct = DataType::Struct(Fields::from(vec![
+ let fields = vec![
Field::new("str", DataType::Utf8, false),
Field::new("num", DataType::Int32, false),
- ]));
- arrow_fields.push(Field::new(
+ ];
+ arrow_fields.push(Field::new_list(
"my_list",
- DataType::List(Arc::new(Field::new("element", arrow_struct,
false))),
+ Field::new_struct("element", fields, false),
true,
));
}
@@ -805,11 +804,10 @@ mod tests {
// }
// Special case: group is named array
{
- let fields = vec![Field::new("str", DataType::Utf8, false)].into();
- let arrow_struct = DataType::Struct(fields);
- arrow_fields.push(Field::new(
+ let fields = vec![Field::new("str", DataType::Utf8, false)];
+ arrow_fields.push(Field::new_list(
"my_list",
- DataType::List(Arc::new(Field::new("array", arrow_struct,
false))),
+ Field::new_struct("array", fields, false),
true,
));
}
@@ -822,15 +820,10 @@ mod tests {
// }
// Special case: group named ends in _tuple
{
- let fields = vec![Field::new("str", DataType::Utf8, false)].into();
- let arrow_struct = DataType::Struct(fields);
- arrow_fields.push(Field::new(
+ let fields = vec![Field::new("str", DataType::Utf8, false)];
+ arrow_fields.push(Field::new_list(
"my_list",
- DataType::List(Arc::new(Field::new(
- "my_list_tuple",
- arrow_struct,
- false,
- ))),
+ Field::new_struct("my_list_tuple", fields, false),
true,
));
}
@@ -838,9 +831,9 @@ mod tests {
// One-level encoding: Only allows required lists with required cells
// repeated value_type name
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"name",
- DataType::List(Arc::new(Field::new("name", DataType::Int32,
false))),
+ Field::new("name", DataType::Int32, false),
false,
));
}
@@ -889,9 +882,9 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"my_list1",
- DataType::List(Arc::new(Field::new("element", DataType::Utf8,
true))),
+ Field::new("element", DataType::Utf8, true),
false,
));
}
@@ -903,9 +896,9 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"my_list2",
- DataType::List(Arc::new(Field::new("element", DataType::Utf8,
false))),
+ Field::new("element", DataType::Utf8, false),
true,
));
}
@@ -917,9 +910,9 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_list(
"my_list3",
- DataType::List(Arc::new(Field::new("element", DataType::Utf8,
false))),
+ Field::new("element", DataType::Utf8, false),
false,
));
}
@@ -973,19 +966,12 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_map(
"my_map1",
- DataType::Map(
- Arc::new(Field::new(
- "key_value",
- DataType::Struct(Fields::from(vec![
- Field::new("key", DataType::Utf8, false),
- Field::new("value", DataType::Int32, true),
- ])),
- false,
- )),
- false,
- ),
+ "key_value",
+ Field::new("key", DataType::Utf8, false),
+ Field::new("value", DataType::Int32, true),
+ false,
false,
));
}
@@ -998,19 +984,12 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_map(
"my_map2",
- DataType::Map(
- Arc::new(Field::new(
- "map",
- DataType::Struct(Fields::from(vec![
- Field::new("str", DataType::Utf8, false),
- Field::new("num", DataType::Int32, false),
- ])),
- false, // (#1697)
- )),
- false,
- ),
+ "map",
+ Field::new("str", DataType::Utf8, false),
+ Field::new("num", DataType::Int32, false),
+ false,
true,
));
}
@@ -1023,19 +1002,12 @@ mod tests {
// }
// }
{
- arrow_fields.push(Field::new(
+ arrow_fields.push(Field::new_map(
"my_map3",
- DataType::Map(
- Arc::new(Field::new(
- "map",
- DataType::Struct(Fields::from(vec![
- Field::new("key", DataType::Utf8, false),
- Field::new("value", DataType::Int32, true),
- ])),
- false, // (#1697)
- )),
- false,
- ),
+ "map",
+ Field::new("key", DataType::Utf8, false),
+ Field::new("value", DataType::Int32, true),
+ false,
true,
));
}
@@ -1199,28 +1171,23 @@ mod tests {
{
arrow_fields.push(Field::new("leaf1", DataType::Int32, true));
- let inner_group_list = Field::new(
+ let inner_group_list = Field::new_list(
"innerGroup",
- DataType::List(Arc::new(Field::new(
+ Field::new_struct(
"innerGroup",
- DataType::Struct(
- vec![Field::new("leaf3", DataType::Int32,
true)].into(),
- ),
+ vec![Field::new("leaf3", DataType::Int32, true)],
false,
- ))),
+ ),
false,
);
- let outer_group_list = Field::new(
+ let outer_group_list = Field::new_list(
"outerGroup",
- DataType::List(Arc::new(Field::new(
+ Field::new_struct(
"outerGroup",
- DataType::Struct(Fields::from(vec![
- Field::new("leaf2", DataType::Int32, true),
- inner_group_list,
- ])),
+ vec![Field::new("leaf2", DataType::Int32, true),
inner_group_list],
false,
- ))),
+ ),
false,
);
arrow_fields.push(outer_group_list);
@@ -1300,9 +1267,9 @@ mod tests {
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
- Field::new(
+ Field::new_list(
"bools",
- DataType::List(Arc::new(Field::new("bools", DataType::Boolean,
false))),
+ Field::new("bools", DataType::Boolean, false),
false,
),
Field::new("date", DataType::Date32, true),
@@ -1324,27 +1291,19 @@ mod tests {
DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
false,
),
- Field::new(
+ Field::new_list(
"int_list",
- DataType::List(Arc::new(Field::new("int_list",
DataType::Int32, false))),
+ Field::new("int_list", DataType::Int32, false),
false,
),
- Field::new(
+ Field::new_list(
"byte_list",
- DataType::List(Arc::new(Field::new(
- "byte_list",
- DataType::Binary,
- false,
- ))),
+ Field::new("byte_list", DataType::Binary, false),
false,
),
- Field::new(
+ Field::new_list(
"string_list",
- DataType::List(Arc::new(Field::new(
- "string_list",
- DataType::Utf8,
- false,
- ))),
+ Field::new("string_list", DataType::Utf8, false),
false,
),
Field::new("decimal_int32", DataType::Decimal128(8, 2), false),
@@ -1415,14 +1374,14 @@ mod tests {
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
- Field::new(
+ Field::new_list(
"bools",
- DataType::List(Arc::new(Field::new("element",
DataType::Boolean, true))),
+ Field::new("element", DataType::Boolean, true),
true,
),
- Field::new(
+ Field::new_list(
"bools_non_null",
- DataType::List(Arc::new(Field::new("element",
DataType::Boolean, false))),
+ Field::new("element", DataType::Boolean, false),
false,
),
Field::new("date", DataType::Date32, true),
@@ -1463,26 +1422,23 @@ mod tests {
DataType::Timestamp(TimeUnit::Microsecond,
Some("+01:00".into())),
false,
),
- Field::new(
+ Field::new_struct(
"struct",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("bools", DataType::Boolean, false),
Field::new("uint32", DataType::UInt32, false),
- Field::new(
+ Field::new_list(
"int32",
- DataType::List(Arc::new(Field::new(
- "element",
- DataType::Int32,
- true,
- ))),
+ Field::new("element", DataType::Int32, true),
false,
),
- ])),
+ ],
false,
),
- Field::new(
+ Field::new_dictionary(
"dictionary_strings",
- DataType::Dictionary(Box::new(DataType::Int32),
Box::new(DataType::Utf8)),
+ DataType::Int32,
+ DataType::Utf8,
false,
),
Field::new("decimal_int32", DataType::Decimal128(8, 2), false),
@@ -1600,9 +1556,9 @@ mod tests {
),
Field::new("c19", DataType::Interval(IntervalUnit::DayTime),
false),
Field::new("c20", DataType::Interval(IntervalUnit::YearMonth),
false),
- Field::new(
+ Field::new_list(
"c21",
- DataType::List(Arc::new(Field::new("list",
DataType::Boolean, true))),
+ Field::new("list", DataType::Boolean, true),
false,
),
// Field::new(
@@ -1660,73 +1616,40 @@ mod tests {
Field::new("c36", DataType::Decimal128(2, 1), false),
Field::new("c37", DataType::Decimal128(50, 20), false),
Field::new("c38", DataType::Decimal128(18, 12), true),
- Field::new(
+ Field::new_map(
"c39",
- DataType::Map(
- Arc::new(Field::new(
- "key_value",
- DataType::Struct(Fields::from(vec![
- Field::new("key", DataType::Utf8, false),
- Field::new(
- "value",
- DataType::List(Arc::new(Field::new(
- "element",
- DataType::Utf8,
- true,
- ))),
- true,
- ),
- ])),
- false, // #1697
- )),
- false, // fails to roundtrip keys_sorted
+ "key_value",
+ Field::new("key", DataType::Utf8, false),
+ Field::new_list(
+ "value",
+ Field::new("element", DataType::Utf8, true),
+ true,
),
+ false, // fails to roundtrip keys_sorted
true,
),
- Field::new(
+ Field::new_map(
"c40",
- DataType::Map(
- Arc::new(Field::new(
- "my_entries",
- DataType::Struct(Fields::from(vec![
- Field::new("my_key", DataType::Utf8, false),
- Field::new(
- "my_value",
- DataType::List(Arc::new(Field::new(
- "item",
- DataType::Utf8,
- true,
- ))),
- true,
- ),
- ])),
- false, // #1697
- )),
- false, // fails to roundtrip keys_sorted
+ "my_entries",
+ Field::new("my_key", DataType::Utf8, false),
+ Field::new_list(
+ "my_value",
+ Field::new("item", DataType::Utf8, true),
+ true,
),
+ false, // fails to roundtrip keys_sorted
true,
),
- Field::new(
+ Field::new_map(
"c41",
- DataType::Map(
- Arc::new(Field::new(
- "my_entries",
- DataType::Struct(Fields::from(vec![
- Field::new("my_key", DataType::Utf8, false),
- Field::new(
- "my_value",
- DataType::List(Arc::new(Field::new(
- "item",
- DataType::Utf8,
- true,
- ))),
- true,
- ),
- ])),
- false,
- )),
- false, // fails to roundtrip keys_sorted
+ "my_entries",
+ Field::new("my_key", DataType::Utf8, false),
+ Field::new_list(
+ "my_value",
+ Field::new("item", DataType::Utf8, true),
+ true,
),
+ false, // fails to roundtrip keys_sorted
false,
),
],
@@ -1760,13 +1683,9 @@ mod tests {
let schema = Schema::new_with_metadata(
vec![
- Field::new(
+ Field::new_list(
"c21",
- DataType::List(Arc::new(Field::new(
- "array",
- DataType::Boolean,
- true,
- ))),
+ Field::new("array", DataType::Boolean, true),
false,
),
Field::new(
@@ -1777,20 +1696,20 @@ mod tests {
),
false,
),
- Field::new(
+ Field::new_list(
"c23",
- DataType::List(Arc::new(Field::new(
+ Field::new_large_list(
"items",
- DataType::LargeList(Arc::new(Field::new(
+ Field::new_struct(
"items",
- DataType::Struct(Fields::from(vec![
+ vec![
Field::new("a", DataType::Int16, true),
Field::new("b", DataType::Float64, false),
- ])),
+ ],
true,
- ))),
+ ),
true,
- ))),
+ ),
true,
),
],