Rich-T-kid commented on code in PR #9865:
URL: https://github.com/apache/arrow-rs/pull/9865#discussion_r3209911188
##########
arrow-select/src/take.rs:
##########
@@ -2842,4 +2828,58 @@ mod tests {
assert_eq!(run_result.run_ends().len(), 0);
assert_eq!(run_result.values().len(), 0);
}
+
+ #[test]
+ fn test_take_run_end_encoded_merges_identical_runs() {
+ // see https://github.com/apache/arrow-rs/issues/7710
+ let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
+ builder.extend([1, 1, 0, 0, 1, 1].into_iter().map(Some));
+ let ree = builder.finish();
+
+ let indexes = Int32Array::from_iter_values(vec![0, 1, 4, 5]);
+ let result = take(&ree, &indexes, None).unwrap();
+ let result = result.as_run::<Int32Type>();
+ assert_eq!(result.run_ends().values(), &[4]);
+ assert_eq!(result.values().as_primitive::<Int32Type>().values(), &[1]);
+ }
+
+ #[test]
+ fn test_take_run_end_encoded_merges_identical_string_runs() {
+ let mut builder = StringRunBuilder::<Int32Type>::new();
+ builder.extend(
+ ["bob", "bob", "alice", "alice", "bob", "bob"]
+ .into_iter()
+ .map(Some),
+ );
+ let ree = builder.finish();
+
+ let indexes = Int32Array::from_iter_values(vec![0, 1, 4, 5]);
+ let result = take(&ree, &indexes, None).unwrap();
+ let result = result.as_run::<Int32Type>();
+ assert_eq!(result.run_ends().values(), &[4]);
+ assert_eq!(result.values().as_string::<i32>().value(0), "bob");
+ }
+
+ #[test]
+ fn test_take_run_end_encoded_mixed_runs() {
+ // Validates that runs are merged whether the same logical value comes
+ // from the same physical index (repeated indices) or distinct ones.
+ let mut builder = StringRunBuilder::<Int32Type>::new();
+ builder.extend(
+ ["bob", "bob", "alice", "alice", "bob", "bob", "eve", "eve"]
+ .into_iter()
+ .map(Some),
+ );
+ let ree = builder.finish();
+
+ let indexes = Int32Array::from_iter_values(vec![0, 0, 1, 4, 5, 2, 3,
2, 6, 7, 6]);
+ let result = take(&ree, &indexes, None).unwrap();
+ let result = result.as_run::<Int32Type>();
+
+ assert_eq!(result.len(), 11);
+ assert_eq!(result.run_ends().values(), &[5, 8, 11]);
+ assert_eq!(result.values().as_string::<i32>().value(0), "bob");
Review Comment:
yea that make sense, Ill update the PR
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]