tustvold commented on code in PR #1588:
URL: https://github.com/apache/arrow-rs/pull/1588#discussion_r864741142
##########
parquet/src/arrow/array_reader/builder.rs:
##########
@@ -208,42 +191,61 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a
ArrayReaderBuilderContext
// Add map type to context
let mut new_context = context.clone();
new_context.path.append(vec![map_type.name().to_string()]);
- if let Repetition::OPTIONAL = map_type.get_basic_info().repetition() {
- new_context.def_level += 1;
+
+ match map_type.get_basic_info().repetition() {
+ Repetition::REQUIRED => {}
+ Repetition::OPTIONAL => {
+ new_context.def_level += 1;
+ }
+ Repetition::REPEATED => {
+ return Err(ArrowError("Map cannot be repeated".to_string()))
+ }
+ }
+
+ if map_type.get_fields().len() != 1 {
+ return Err(ArrowError(format!(
+ "Map field must have exactly one key_value child, found {}",
+ map_type.get_fields().len()
+ )));
}
// Add map entry (key_value) to context
- let map_key_value = map_type.get_fields().first().ok_or_else(|| {
- ArrowError("Map field must have a key_value entry".to_string())
- })?;
+ let map_key_value = &map_type.get_fields()[0];
+ if map_key_value.get_basic_info().repetition() != Repetition::REPEATED
{
+ return Err(ArrowError(
+ "Child of map field must be repeated".to_string(),
+ ));
+ }
+
new_context
.path
.append(vec![map_key_value.name().to_string()]);
+
new_context.rep_level += 1;
+ new_context.def_level += 1;
+
+ if map_key_value.get_fields().len() != 2 {
+ // According to the specification the values are optional (#1642)
+ return Err(ArrowError(format!(
+ "Child of map field must have two children, found {}",
+ map_key_value.get_fields().len()
+ )));
+ }
// Get key and value, and create context for each
- let map_key = map_key_value
- .get_fields()
- .first()
- .ok_or_else(|| ArrowError("Map entry must have a
key".to_string()))?;
- let map_value = map_key_value
- .get_fields()
- .get(1)
- .ok_or_else(|| ArrowError("Map entry must have a
value".to_string()))?;
-
- let key_reader = {
- let mut key_context = new_context.clone();
- key_context.def_level += 1;
- key_context.path.append(vec![map_key.name().to_string()]);
Review Comment:
This is a drive-by fix, the context is the context of the parent, not the
value being dispatched. This would result in map_key appearing twice
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]