This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new ed9efe78e4 Fix regression caused by changes in Display for DataType -
display (`List(non-null Int64)` instead of `List(nullable Int64)` (#8890)
ed9efe78e4 is described below
commit ed9efe78e4cc958cc96707557818e754419debb0
Author: Ed Seidl <[email protected]>
AuthorDate: Thu Nov 20 12:38:22 2025 -0800
Fix regression caused by changes in Display for DataType - display
(`List(non-null Int64)` instead of `List(nullable Int64)` (#8890)
# Which issue does this PR close?
- Closes #8883.
# Rationale for this change
Second attempt at fixing #8883.
# What changes are included in this PR?
This changes `Display` for `DataType` to indicate non-nullable fields as
"nonnull", and removes the "nullable" indicator for nullable fields.
This is to remain backwards compatible with previous behavior.
# Are these changes tested?
Should be handled by existing tests
# Are there any user-facing changes?
No, this changes un-released behavior
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-cast/src/cast/mod.rs | 8 ++--
arrow-schema/src/datatype_display.rs | 73 ++++++++++++++++++-----------------
arrow-schema/src/datatype_parse.rs | 48 ++++++++++++++---------
arrow-schema/src/schema.rs | 2 +-
parquet/src/arrow/arrow_reader/mod.rs | 4 +-
5 files changed, 74 insertions(+), 61 deletions(-)
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 47fdb01a09..897a9153cb 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -8805,7 +8805,7 @@ mod tests {
};
assert_eq!(
t,
- r#"Casting from Map("entries": Struct("key": Utf8, "value":
nullable Utf8), unsorted) to Map("entries": Struct("key": Utf8, "value": Utf8),
sorted) not supported"#
+ r#"Casting from Map("entries": non-null Struct("key": non-null
Utf8, "value": Utf8), unsorted) to Map("entries": non-null Struct("key":
non-null Utf8, "value": non-null Utf8), sorted) not supported"#
);
}
@@ -8856,7 +8856,7 @@ mod tests {
};
assert_eq!(
t,
- r#"Casting from Map("entries": Struct("key": Utf8, "value":
nullable Interval(DayTime)), unsorted) to Map("entries": Struct("key": Utf8,
"value": Duration(s)), sorted) not supported"#
+ r#"Casting from Map("entries": non-null Struct("key": non-null
Utf8, "value": Interval(DayTime)), unsorted) to Map("entries": non-null
Struct("key": non-null Utf8, "value": non-null Duration(s)), sorted) not
supported"#
);
}
@@ -10961,7 +10961,7 @@ mod tests {
let to_type = DataType::Utf8;
let result = cast(&struct_array, &to_type);
assert_eq!(
- r#"Cast error: Casting from Struct("a": Boolean) to Utf8 not
supported"#,
+ r#"Cast error: Casting from Struct("a": non-null Boolean) to Utf8
not supported"#,
result.unwrap_err().to_string()
);
}
@@ -10972,7 +10972,7 @@ mod tests {
let to_type = DataType::Struct(vec![Field::new("a", DataType::Boolean,
false)].into());
let result = cast(&array, &to_type);
assert_eq!(
- r#"Cast error: Casting from Utf8 to Struct("a": Boolean) not
supported"#,
+ r#"Cast error: Casting from Utf8 to Struct("a": non-null Boolean)
not supported"#,
result.unwrap_err().to_string()
);
}
diff --git a/arrow-schema/src/datatype_display.rs
b/arrow-schema/src/datatype_display.rs
index af36c0cb2c..cca7cf254f 100644
--- a/arrow-schema/src/datatype_display.rs
+++ b/arrow-schema/src/datatype_display.rs
@@ -15,19 +15,23 @@
// specific language governing permissions and limitations
// under the License.
-use std::{collections::HashMap, fmt};
-
use crate::DataType;
+use std::fmt::Display;
+use std::{collections::HashMap, fmt};
-impl fmt::Display for DataType {
+impl Display for DataType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn format_metadata(metadata: &HashMap<String, String>) -> String {
format!("{}", FormatMetadata(metadata))
}
+ fn format_nullability(field: &crate::Field) -> &str {
+ if field.is_nullable() { "" } else { "non-null " }
+ }
+
fn format_field(field: &crate::Field) -> String {
let name = field.name();
- let maybe_nullable = if field.is_nullable() { "nullable " } else {
"" };
+ let maybe_nullable = format_nullability(field);
let data_type = field.data_type();
let metadata_str = format_metadata(field.metadata());
format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
@@ -90,7 +94,7 @@ impl fmt::Display for DataType {
};
let name = field.name();
- let maybe_nullable = if field.is_nullable() { "nullable " }
else { "" };
+ let maybe_nullable = format_nullability(field);
let data_type = field.data_type();
let field_name_str = if name == "item" {
String::default()
@@ -99,7 +103,7 @@ impl fmt::Display for DataType {
};
let metadata_str = format_metadata(field.metadata());
- // e.g. `LargeList(nullable Uint32)
+ // e.g. `LargeList(non-null Uint32)
write!(
f,
"{type_name}({maybe_nullable}{data_type}{field_name_str}{metadata_str})"
@@ -107,7 +111,7 @@ impl fmt::Display for DataType {
}
Self::FixedSizeList(field, size) => {
let name = field.name();
- let maybe_nullable = if field.is_nullable() { "nullable " }
else { "" };
+ let maybe_nullable = format_nullability(field);
let data_type = field.data_type();
let field_name_str = if name == "item" {
String::default()
@@ -209,7 +213,7 @@ mod tests {
fn test_display_list() {
let list_data_type =
DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
let list_data_type_string = list_data_type.to_string();
- let expected_string = "List(nullable Int32)";
+ let expected_string = "List(Int32)";
assert_eq!(list_data_type_string, expected_string);
}
@@ -218,7 +222,7 @@ mod tests {
let list_view_data_type =
DataType::ListView(Arc::new(Field::new("item", DataType::Int32,
true)));
let list_view_data_type_string = list_view_data_type.to_string();
- let expected_string = "ListView(nullable Int32)";
+ let expected_string = "ListView(Int32)";
assert_eq!(list_view_data_type_string, expected_string);
}
@@ -226,7 +230,7 @@ mod tests {
fn test_display_list_with_named_field() {
let list_data_type = DataType::List(Arc::new(Field::new("foo",
DataType::UInt64, false)));
let list_data_type_string = list_data_type.to_string();
- let expected_string = "List(UInt64, field: 'foo')";
+ let expected_string = "List(non-null UInt64, field: 'foo')";
assert_eq!(list_data_type_string, expected_string);
}
@@ -235,7 +239,7 @@ mod tests {
let list_view_data_type =
DataType::ListView(Arc::new(Field::new("bar", DataType::UInt64,
false)));
let list_view_data_type_string = list_view_data_type.to_string();
- let expected_string = "ListView(UInt64, field: 'bar')";
+ let expected_string = "ListView(non-null UInt64, field: 'bar')";
assert_eq!(list_view_data_type_string, expected_string);
}
@@ -246,7 +250,7 @@ mod tests {
false,
)));
let nested_data_type_string = nested_data_type.to_string();
- let nested_expected_string = "List(List(UInt64))";
+ let nested_expected_string = "List(non-null List(non-null UInt64))";
assert_eq!(nested_data_type_string, nested_expected_string);
}
@@ -257,7 +261,7 @@ mod tests {
false,
)));
let nested_view_data_type_string = nested_view_data_type.to_string();
- let nested_view_expected_string = "ListView(ListView(UInt64))";
+ let nested_view_expected_string = "ListView(non-null ListView(non-null
UInt64))";
assert_eq!(nested_view_data_type_string, nested_view_expected_string);
}
@@ -268,7 +272,7 @@ mod tests {
field.set_metadata(metadata);
let list_data_type = DataType::List(Arc::new(field));
let list_data_type_string = list_data_type.to_string();
- let expected_string = "List(nullable Int32, metadata: {\"foo1\":
\"value1\"})";
+ let expected_string = "List(Int32, metadata: {\"foo1\": \"value1\"})";
assert_eq!(list_data_type_string, expected_string);
}
@@ -280,7 +284,7 @@ mod tests {
field.set_metadata(metadata);
let list_view_data_type = DataType::ListView(Arc::new(field));
let list_view_data_type_string = list_view_data_type.to_string();
- let expected_string = "ListView(nullable Int32, metadata: {\"foo2\":
\"value2\"})";
+ let expected_string = "ListView(Int32, metadata: {\"foo2\":
\"value2\"})";
assert_eq!(list_view_data_type_string, expected_string);
}
@@ -289,14 +293,14 @@ mod tests {
let large_list_data_type =
DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
let large_list_data_type_string = large_list_data_type.to_string();
- let expected_string = "LargeList(nullable Int32)";
+ let expected_string = "LargeList(Int32)";
assert_eq!(large_list_data_type_string, expected_string);
// Test with named field
let large_list_named =
DataType::LargeList(Arc::new(Field::new("bar", DataType::UInt64,
false)));
let large_list_named_string = large_list_named.to_string();
- let expected_named_string = "LargeList(UInt64, field: 'bar')";
+ let expected_named_string = "LargeList(non-null UInt64, field: 'bar')";
assert_eq!(large_list_named_string, expected_named_string);
// Test with metadata
@@ -305,8 +309,7 @@ mod tests {
field.set_metadata(metadata);
let large_list_metadata = DataType::LargeList(Arc::new(field));
let large_list_metadata_string = large_list_metadata.to_string();
- let expected_metadata_string =
- "LargeList(nullable Int32, metadata: {\"key1\": \"value1\"})";
+ let expected_metadata_string = "LargeList(Int32, metadata: {\"key1\":
\"value1\"})";
assert_eq!(large_list_metadata_string, expected_metadata_string);
}
@@ -315,14 +318,14 @@ mod tests {
let large_list_view_data_type =
DataType::LargeListView(Arc::new(Field::new("item",
DataType::Int32, true)));
let large_list_view_data_type_string =
large_list_view_data_type.to_string();
- let expected_string = "LargeListView(nullable Int32)";
+ let expected_string = "LargeListView(Int32)";
assert_eq!(large_list_view_data_type_string, expected_string);
// Test with named field
let large_list_view_named =
DataType::LargeListView(Arc::new(Field::new("bar",
DataType::UInt64, false)));
let large_list_view_named_string = large_list_view_named.to_string();
- let expected_named_string = "LargeListView(UInt64, field: 'bar')";
+ let expected_named_string = "LargeListView(non-null UInt64, field:
'bar')";
assert_eq!(large_list_view_named_string, expected_named_string);
// Test with metadata
@@ -331,8 +334,7 @@ mod tests {
field.set_metadata(metadata);
let large_list_view_metadata =
DataType::LargeListView(Arc::new(field));
let large_list_view_metadata_string =
large_list_view_metadata.to_string();
- let expected_metadata_string =
- "LargeListView(nullable Int32, metadata: {\"key1\": \"value1\"})";
+ let expected_metadata_string = "LargeListView(Int32, metadata:
{\"key1\": \"value1\"})";
assert_eq!(large_list_view_metadata_string, expected_metadata_string);
}
@@ -341,14 +343,14 @@ mod tests {
let fixed_size_list =
DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)),
5);
let fixed_size_list_string = fixed_size_list.to_string();
- let expected_string = "FixedSizeList(5 x nullable Int32)";
+ let expected_string = "FixedSizeList(5 x Int32)";
assert_eq!(fixed_size_list_string, expected_string);
// Test with named field
let fixed_size_named =
DataType::FixedSizeList(Arc::new(Field::new("baz",
DataType::UInt64, false)), 3);
let fixed_size_named_string = fixed_size_named.to_string();
- let expected_named_string = "FixedSizeList(3 x UInt64, field: 'baz')";
+ let expected_named_string = "FixedSizeList(3 x non-null UInt64, field:
'baz')";
assert_eq!(fixed_size_named_string, expected_named_string);
// Test with metadata
@@ -357,8 +359,7 @@ mod tests {
field.set_metadata(metadata);
let fixed_size_metadata = DataType::FixedSizeList(Arc::new(field), 4);
let fixed_size_metadata_string = fixed_size_metadata.to_string();
- let expected_metadata_string =
- "FixedSizeList(4 x nullable Int32, metadata: {\"key2\":
\"value2\"})";
+ let expected_metadata_string = "FixedSizeList(4 x Int32, metadata:
{\"key2\": \"value2\"})";
assert_eq!(fixed_size_metadata_string, expected_metadata_string);
}
@@ -370,7 +371,7 @@ mod tests {
];
let struct_data_type = DataType::Struct(fields.into());
let struct_data_type_string = struct_data_type.to_string();
- let expected_string = "Struct(\"a\": Int32, \"b\": nullable Utf8)";
+ let expected_string = "Struct(\"a\": non-null Int32, \"b\": Utf8)";
assert_eq!(struct_data_type_string, expected_string);
// Test with metadata
@@ -384,7 +385,7 @@ mod tests {
vec![Field::new("a", DataType::Int32, false), field_with_metadata];
let struct_data_type_with_metadata =
DataType::Struct(struct_fields_with_metadata.into());
let struct_data_type_with_metadata_string =
struct_data_type_with_metadata.to_string();
- let expected_string_with_metadata = "Struct(\"a\": Int32, \"b\":
nullable Utf8, metadata: {\"key\": \"value\", \"key2\": \"value2\"})";
+ let expected_string_with_metadata = "Struct(\"a\": non-null Int32,
\"b\": Utf8, metadata: {\"key\": \"value\", \"key2\": \"value2\"})";
assert_eq!(
struct_data_type_with_metadata_string,
expected_string_with_metadata
@@ -405,7 +406,7 @@ mod tests {
let union_data_type = DataType::Union(union_fields,
crate::UnionMode::Sparse);
let union_data_type_string = union_data_type.to_string();
- let expected_string = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\":
nullable Utf8))";
+ let expected_string = "Union(Sparse, 0: (\"a\": non-null Int32), 1:
(\"b\": Utf8))";
assert_eq!(union_data_type_string, expected_string);
// Test with metadata
@@ -421,7 +422,7 @@ mod tests {
let union_data_type_with_metadata =
DataType::Union(union_fields_with_metadata,
crate::UnionMode::Sparse);
let union_data_type_with_metadata_string =
union_data_type_with_metadata.to_string();
- let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": Int32),
1: (\"b\": nullable Utf8, metadata: {\"key\": \"value\"}))";
+ let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": non-null
Int32), 1: (\"b\": Utf8, metadata: {\"key\": \"value\"}))";
assert_eq!(
union_data_type_with_metadata_string,
expected_string_with_metadata
@@ -444,7 +445,7 @@ mod tests {
let map_data_type = DataType::Map(Arc::new(entry_field), true);
let map_data_type_string = map_data_type.to_string();
let expected_string =
- "Map(\"entries\": Struct(\"key\": Utf8, \"value\": nullable
Int32), sorted)";
+ "Map(\"entries\": non-null Struct(\"key\": non-null Utf8,
\"value\": Int32), sorted)";
assert_eq!(map_data_type_string, expected_string);
// Test with metadata
@@ -463,7 +464,7 @@ mod tests {
entry_field_with_metadata.set_metadata(metadata);
let map_data_type_with_metadata =
DataType::Map(Arc::new(entry_field_with_metadata), true);
let map_data_type_with_metadata_string =
map_data_type_with_metadata.to_string();
- let expected_string_with_metadata = "Map(\"entries\": Struct(\"key\":
Utf8, \"value\": nullable Int32), metadata: {\"key\": \"value\"}, sorted)";
+ let expected_string_with_metadata = "Map(\"entries\": non-null
Struct(\"key\": non-null Utf8, \"value\": Int32), metadata: {\"key\":
\"value\"}, sorted)";
assert_eq!(
map_data_type_with_metadata_string,
expected_string_with_metadata
@@ -476,7 +477,7 @@ mod tests {
let values_field = Arc::new(Field::new("values", DataType::Int32,
true));
let ree_data_type = DataType::RunEndEncoded(run_ends_field.clone(),
values_field.clone());
let ree_data_type_string = ree_data_type.to_string();
- let expected_string = "RunEndEncoded(\"run_ends\": UInt32, \"values\":
nullable Int32)";
+ let expected_string = "RunEndEncoded(\"run_ends\": non-null UInt32,
\"values\": Int32)";
assert_eq!(ree_data_type_string, expected_string);
// Test with metadata
@@ -486,7 +487,7 @@ mod tests {
let ree_data_type_with_metadata =
DataType::RunEndEncoded(Arc::new(run_ends_field_with_metadata),
values_field.clone());
let ree_data_type_with_metadata_string =
ree_data_type_with_metadata.to_string();
- let expected_string_with_metadata = "RunEndEncoded(\"run_ends\":
UInt32, metadata: {\"key\": \"value\"}, \"values\": nullable Int32)";
+ let expected_string_with_metadata = "RunEndEncoded(\"run_ends\":
non-null UInt32, metadata: {\"key\": \"value\"}, \"values\": Int32)";
assert_eq!(
ree_data_type_with_metadata_string,
expected_string_with_metadata
@@ -514,7 +515,7 @@ mod tests {
);
let complex_dict_data_type_string = complex_dict_data_type.to_string();
let expected_complex_string =
- "Dictionary(Int16, Struct(\"a\": Int32, \"b\": nullable Utf8))";
+ "Dictionary(Int16, Struct(\"a\": non-null Int32, \"b\": Utf8))";
assert_eq!(complex_dict_data_type_string, expected_complex_string);
}
diff --git a/arrow-schema/src/datatype_parse.rs
b/arrow-schema/src/datatype_parse.rs
index 0bb8d629e2..68775f9d5b 100644
--- a/arrow-schema/src/datatype_parse.rs
+++ b/arrow-schema/src/datatype_parse.rs
@@ -99,9 +99,9 @@ impl<'a> Parser<'a> {
}
/// parses Field, this is the inversion of `format_field` in
`datatype_display.rs`.
- /// E.g: "a": nullable Int64
+ /// E.g: "a": non-null Int64
///
- /// TODO: support metadata: `"a": nullable Int64 metadata: {"foo":
"value"}`
+ /// TODO: support metadata: `"a": non-null Int64 metadata: {"foo":
"value"}`
fn parse_field(&mut self) -> ArrowResult<Field> {
let name = self.parse_double_quoted_string("Field")?;
self.expect_token(Token::Colon)?;
@@ -112,9 +112,9 @@ impl<'a> Parser<'a> {
/// Parses field inside a list. Use `Field::LIST_FIELD_DEFAULT_NAME`
/// if no field name is specified.
- /// E.g: `nullable Int64, field: 'foo'` or `nullable Int64`
+ /// E.g: `non-null Int64, field: 'foo'` or `non-null Int64`
///
- /// TODO: support metadata: `nullable Int64, metadata: {"foo2": "value"}`
+ /// TODO: support metadata: `non-ull Int64, metadata: {"foo2": "value"}`
fn parse_list_field(&mut self, context: &str) -> ArrowResult<Field> {
let nullable = self.parse_opt_nullable();
let data_type = self.parse_next_type()?;
@@ -137,7 +137,7 @@ impl<'a> Parser<'a> {
}
/// Parses the List type (called after `List` has been consumed)
- /// E.g: List(nullable Int64, field: 'foo')
+ /// E.g: List(non-null Int64, field: 'foo')
fn parse_list(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let field = self.parse_list_field("List")?;
@@ -146,7 +146,7 @@ impl<'a> Parser<'a> {
}
/// Parses the ListView type (called after `ListView` has been consumed)
- /// E.g: ListView(nullable Int64, field: 'foo')
+ /// E.g: ListView(non-null Int64, field: 'foo')
fn parse_list_view(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let field = self.parse_list_field("ListView")?;
@@ -155,7 +155,7 @@ impl<'a> Parser<'a> {
}
/// Parses the LargeList type (called after `LargeList` has been consumed)
- /// E.g: LargeList(nullable Int64, field: 'foo')
+ /// E.g: LargeList(non-null Int64, field: 'foo')
fn parse_large_list(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let field = self.parse_list_field("LargeList")?;
@@ -164,7 +164,7 @@ impl<'a> Parser<'a> {
}
/// Parses the LargeListView type (called after `LargeListView` has been
consumed)
- /// E.g: LargeListView(nullable Int64, field: 'foo')
+ /// E.g: LargeListView(non-null Int64, field: 'foo')
fn parse_large_list_view(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let field = self.parse_list_field("LargeListView")?;
@@ -175,14 +175,14 @@ impl<'a> Parser<'a> {
/// Parses the FixedSizeList type (called after `FixedSizeList` has been
consumed)
///
/// Examples:
- /// * `FixedSizeList(5 x nullable Int64, field: 'foo')`
+ /// * `FixedSizeList(5 x non-null Int64, field: 'foo')`
/// * `FixedSizeList(4, Int64)`
///
fn parse_fixed_size_list(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let length = self.parse_i32("FixedSizeList")?;
match self.next_token()? {
- // `FixedSizeList(5 x nullable Int64, field: 'foo')` format
+ // `FixedSizeList(5 x non-null Int64, field: 'foo')` format
Token::X => {
let field = self.parse_list_field("FixedSizeList")?;
self.expect_token(Token::RParen)?;
@@ -464,7 +464,7 @@ impl<'a> Parser<'a> {
}
/// Parses the next Union (called after `Union` has been consumed)
- /// E.g: Union(Sparse, 0: ("a": Int32), 1: ("b": nullable Utf8))
+ /// E.g: Union(Sparse, 0: ("a": Int32), 1: ("b": non-null Utf8))
fn parse_union(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let union_mode = self.parse_union_mode()?;
@@ -501,7 +501,7 @@ impl<'a> Parser<'a> {
}
/// Parses the next UnionField
- /// 0: ("a": nullable Int32)
+ /// 0: ("a": non-null Int32)
fn parse_union_field(&mut self) -> ArrowResult<(i8, Field)> {
let type_id = self.parse_i8("UnionField")?;
self.expect_token(Token::Colon)?;
@@ -512,7 +512,7 @@ impl<'a> Parser<'a> {
}
/// Parses the next Map (called after `Map` has been consumed)
- /// E.g: Map("entries": Struct("key": Utf8, "value": nullable Int32),
sorted)
+ /// E.g: Map("entries": Struct("key": Utf8, "value": non-null Int32),
sorted)
fn parse_map(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let field = self.parse_field()?;
@@ -534,7 +534,7 @@ impl<'a> Parser<'a> {
}
/// Parses the next RunEndEncoded (called after `RunEndEncoded` has been
consumed)
- /// E.g: RunEndEncoded("run_ends": UInt32, "values": nullable Int32)
+ /// E.g: RunEndEncoded("run_ends": UInt32, "values": nonnull Int32)
fn parse_run_end_encoded(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let run_ends = self.parse_field()?;
@@ -547,11 +547,12 @@ impl<'a> Parser<'a> {
))
}
- /// return and consume if the next token is `Token::Nullable`
+ /// consume the next token and return `false` if the field is `nonnull`.
fn parse_opt_nullable(&mut self) -> bool {
- self.tokenizer
- .next_if(|next| matches!(next, Ok(Token::Nullable)))
- .is_some()
+ let tok = self
+ .tokenizer
+ .next_if(|next| matches!(next, Ok(Token::NonNull |
Token::Nullable)));
+ !matches!(tok, Some(Ok(Token::NonNull)))
}
/// return the next token, or an error if there are none left
@@ -705,6 +706,7 @@ impl<'a> Tokenizer<'a> {
"Some" => Token::Some,
"None" => Token::None,
+ "non-null" => Token::NonNull,
"nullable" => Token::Nullable,
"field" => Token::Field,
"x" => Token::X,
@@ -866,6 +868,7 @@ enum Token {
Map,
MapSorted(bool),
RunEndEncoded,
+ NonNull,
Nullable,
Field,
X,
@@ -910,6 +913,7 @@ impl Display for Token {
write!(f, "{}", if *sorted { "sorted" } else { "unsorted" })
}
Token::RunEndEncoded => write!(f, "RunEndEncoded"),
+ Token::NonNull => write!(f, "non-null"),
Token::Nullable => write!(f, "nullable"),
Token::Field => write!(f, "field"),
Token::X => write!(f, "x"),
@@ -1385,6 +1389,14 @@ mod test {
"FixedSizeList(4, Int64)",
FixedSizeList(Arc::new(Field::new_list_field(Int64, true)), 4),
),
+ (
+ "List(Int64)",
+ List(Arc::new(Field::new_list_field(Int64, true))),
+ ),
+ (
+ "LargeList(Int64)",
+ LargeList(Arc::new(Field::new_list_field(Int64, true))),
+ ),
];
for (data_type_string, expected_data_type) in cases {
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index 63be333a0f..0c7db39dfb 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -726,7 +726,7 @@ mod tests {
schema.to_string(),
"Field { \"first_name\": Utf8, metadata: {\"k\": \"v\"} }, \
Field { \"last_name\": Utf8 }, \
- Field { \"address\": Struct(\"street\": Utf8, \"zip\": UInt16) },
\
+ Field { \"address\": Struct(\"street\": non-null Utf8, \"zip\":
non-null UInt16) }, \
Field { \"interests\": nullable Dictionary(Int32, Utf8), dict_id:
123, dict_is_ordered }"
)
}
diff --git a/parquet/src/arrow/arrow_reader/mod.rs
b/parquet/src/arrow/arrow_reader/mod.rs
index f314df1284..2b806db896 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -4008,8 +4008,8 @@ pub(crate) mod tests {
),
])),
"Arrow: Incompatible supplied Arrow schema: data type mismatch for
field nested: \
- requested Struct(\"nested1_valid\": Utf8, \"nested1_invalid\":
Int32) \
- but found Struct(\"nested1_valid\": Utf8, \"nested1_invalid\":
Int64)",
+ requested Struct(\"nested1_valid\": non-null Utf8,
\"nested1_invalid\": non-null Int32) \
+ but found Struct(\"nested1_valid\": non-null Utf8,
\"nested1_invalid\": non-null Int64)",
);
}