brunal commented on code in PR #9486:
URL: https://github.com/apache/arrow-rs/pull/9486#discussion_r2867201958
##########
arrow-row/src/lib.rs:
##########
@@ -639,6 +643,33 @@ impl Codec {
let converter = RowConverter::new(vec![field])?;
Ok(Self::List(converter))
}
+ DataType::Map(f, _) => {
+ // The encoded contents will be inverted if descending is set
to true
Review Comment:
Nit: backticks around `descending` (for consitency) + period.
##########
arrow-row/src/lib.rs:
##########
@@ -335,11 +335,13 @@ mod variable;
/// └───────┴───────────────┴───────┴─────────┴───────┘
/// ```
///
-/// ## List Encoding
+/// ## List and Map Encoding
///
/// Lists are encoded by first encoding all child elements to the row format.
///
-/// A list value is then encoded as the concatenation of each of the child
elements,
+/// the Map encoding is the same with the only difference being that the child
elements are key-value pairs
Review Comment:
Nit: capitalization & period.
##########
arrow-row/src/lib.rs:
##########
@@ -639,6 +643,33 @@ impl Codec {
let converter = RowConverter::new(vec![field])?;
Ok(Self::List(converter))
}
+ DataType::Map(f, _) => {
+ // The encoded contents will be inverted if descending is set
to true
+ // As such we set `descending` to false and negate nulls first
if it
+ // it set to true
Review Comment:
"it" seems ambiguous -- is it `nulls_first` or `descending`? Suggestion: "if
`descending` was set to true".
##########
arrow-row/src/lib.rs:
##########
@@ -4055,6 +4544,51 @@ mod tests {
.collect()
}
+ #[derive(Clone, Copy)]
+ enum GenerateAllUniqueNullBehavior {
+ AllowUpToSingleNull { valid_percent: f64 },
+ AllValid,
+ }
+
+ fn generate_all_unique_primitive_array<K>(
+ rng: &mut impl RngCore,
+ len: usize,
+ null_behavior: GenerateAllUniqueNullBehavior,
+ ) -> PrimitiveArray<K>
+ where
+ K: ArrowPrimitiveType,
+ K::Native: Hash + Eq,
+ StandardUniform: Distribution<K::Native>,
+ {
+ let mut seen = std::collections::HashSet::new();
+ (0..len)
+ .map(|_| {
+ let mut value;
+
+ loop {
+ match null_behavior {
+ GenerateAllUniqueNullBehavior::AllValid => {
+ value = Some(rng.random());
Review Comment:
you can factor out `value =` outside the match.
##########
arrow-row/src/lib.rs:
##########
@@ -639,6 +643,33 @@ impl Codec {
let converter = RowConverter::new(vec![field])?;
Ok(Self::List(converter))
}
+ DataType::Map(f, _) => {
+ // The encoded contents will be inverted if descending is set
to true
+ // As such we set `descending` to false and negate nulls first
if it
+ // it set to true
+ let options = SortOptions {
+ descending: false,
+ nulls_first: sort_field.options.nulls_first !=
sort_field.options.descending,
+ };
+
+ let DataType::Struct(fields) = f.data_type() else {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "expected struct field in map, got {:?}",
+ f.data_type()
+ )));
+ };
+
+ // For Map type we unwrap the intermediate struct type to
avoid going through Struct codec to improve performance
+ let fields = fields
+ .iter()
+ .map(|struct_field| {
+
SortField::new_with_options(struct_field.data_type().clone(), options)
+ })
+ .collect::<Vec<_>>();
+ assert_eq!(fields.len(), 2);
Review Comment:
Why is this safe? (in contrast with returning an Err)
##########
arrow-row/src/lib.rs:
##########
@@ -4055,6 +4544,51 @@ mod tests {
.collect()
}
+ #[derive(Clone, Copy)]
+ enum GenerateAllUniqueNullBehavior {
+ AllowUpToSingleNull { valid_percent: f64 },
+ AllValid,
+ }
+
+ fn generate_all_unique_primitive_array<K>(
+ rng: &mut impl RngCore,
+ len: usize,
+ null_behavior: GenerateAllUniqueNullBehavior,
+ ) -> PrimitiveArray<K>
+ where
+ K: ArrowPrimitiveType,
+ K::Native: Hash + Eq,
+ StandardUniform: Distribution<K::Native>,
+ {
+ let mut seen = std::collections::HashSet::new();
+ (0..len)
+ .map(|_| {
+ let mut value;
+
+ loop {
Review Comment:
If you pass `K=Int8Type` with `len=257` then you end up with an infinite
loop.
##########
arrow-row/src/lib.rs:
##########
@@ -4055,6 +4544,51 @@ mod tests {
.collect()
}
+ #[derive(Clone, Copy)]
+ enum GenerateAllUniqueNullBehavior {
+ AllowUpToSingleNull { valid_percent: f64 },
Review Comment:
Is probalistically adding a Null actually better than always inserting one?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]