This is an automated email from the ASF dual-hosted git repository.
zanmato pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d3c467625d GH-45393: [C++][Compute] Fix wrong decoding for 32-bit
column in row table (#45473)
d3c467625d is described below
commit d3c467625d83bd69e2b85a289f810ec5b2789d63
Author: Rossi Sun <[email protected]>
AuthorDate: Mon Feb 10 16:58:21 2025 +0800
GH-45393: [C++][Compute] Fix wrong decoding for 32-bit column in row table
(#45473)
### Rationale for this change
The failure reported in #45393 seems to be caused by a careless parentheses
typo introduced in #45336:
https://github.com/apache/arrow/blob/e32f56b478171fc4b53dc2042c4cf5d37c97e351/cpp/src/arrow/compute/row/encode_internal.cc#L281-L282
And unfortunately our `Grouper` UT doesn't have cases covering this
particular code path (the questioning code path is only triggered in `Grouper`
with very restrictive conditions: the row table is fixed-length, a 32-bit key
is encoded after some other keys).
### What changes are included in this PR?
An UT to reproduce the issue and the fix.
### Are these changes tested?
UT included.
### Are there any user-facing changes?
None.
* GitHub Issue: #45393
Authored-by: Rossi Sun <[email protected]>
Signed-off-by: Rossi Sun <[email protected]>
---
cpp/src/arrow/compute/row/encode_internal.cc | 2 +-
cpp/src/arrow/compute/row/grouper_test.cc | 22 ++++++++++++++++++++++
2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/compute/row/encode_internal.cc
b/cpp/src/arrow/compute/row/encode_internal.cc
index 0e2720a286..0d57cb83bf 100644
--- a/cpp/src/arrow/compute/row/encode_internal.cc
+++ b/cpp/src/arrow/compute/row/encode_internal.cc
@@ -279,7 +279,7 @@ void EncoderInteger::Decode(uint32_t start_row, uint32_t
num_rows,
case 4:
for (uint32_t i = 0; i < num_rows; ++i) {
reinterpret_cast<uint32_t*>(col_base)[i] = *reinterpret_cast<const
uint32_t*>(
- rows.fixed_length_rows(start_row + i + offset_within_row));
+ rows.fixed_length_rows(start_row + i) + offset_within_row);
}
break;
case 8:
diff --git a/cpp/src/arrow/compute/row/grouper_test.cc
b/cpp/src/arrow/compute/row/grouper_test.cc
index 0b8d8da0a6..3ed2fde2e9 100644
--- a/cpp/src/arrow/compute/row/grouper_test.cc
+++ b/cpp/src/arrow/compute/row/grouper_test.cc
@@ -825,6 +825,28 @@ TEST(Grouper, DictKey) {
g.grouper_->Consume(dict_span));
}
+// GH-45393: Test combinations of numeric type keys of different lengths.
+TEST(Grouper, MultipleIntKeys) {
+ auto types = NumericTypes();
+ for (auto& t0 : types) {
+ ARROW_SCOPED_TRACE("t0=", t0->ToString());
+ for (auto& t1 : types) {
+ ARROW_SCOPED_TRACE("t1=", t1->ToString());
+ for (auto& t2 : types) {
+ ARROW_SCOPED_TRACE("t2=", t2->ToString());
+ TestGrouper g({t0, t1, t2});
+
+ g.ExpectConsume(R"([[0, 1, 2], [0, 1, 2]])", "[0, 0]");
+ g.ExpectConsume(R"([[0, 1, 2], [null, 1, 2]])", "[0, 1]");
+ g.ExpectConsume(R"([[0, 1, 2], [0, null, 2]])", "[0, 2]");
+ g.ExpectConsume(R"([[0, 1, 2], [0, 1, null]])", "[0, 3]");
+
+ g.ExpectUniques("[[0, 1, 2], [null, 1, 2], [0, null, 2], [0, 1,
null]]");
+ }
+ }
+ }
+}
+
TEST(Grouper, StringInt64Key) {
TestGrouper g({utf8(), int64()});