This is an automated email from the ASF dual-hosted git repository.

zanmato pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d3c467625d GH-45393: [C++][Compute] Fix wrong decoding for 32-bit 
column in row table (#45473)
d3c467625d is described below

commit d3c467625d83bd69e2b85a289f810ec5b2789d63
Author: Rossi Sun <[email protected]>
AuthorDate: Mon Feb 10 16:58:21 2025 +0800

    GH-45393: [C++][Compute] Fix wrong decoding for 32-bit column in row table 
(#45473)
    
    
    
    ### Rationale for this change
    
    The failure reported in #45393 seems to be caused by a careless parentheses 
typo introduced in #45336:
    
https://github.com/apache/arrow/blob/e32f56b478171fc4b53dc2042c4cf5d37c97e351/cpp/src/arrow/compute/row/encode_internal.cc#L281-L282
    
    And unfortunately our `Grouper` UT doesn't have cases covering this 
particular code path (the questioning code path is only triggered in `Grouper` 
with very restrictive conditions: the row table is fixed-length, a 32-bit key 
is encoded after some other keys).
    
    ### What changes are included in this PR?
    
    An UT to reproduce the issue and the fix.
    
    ### Are these changes tested?
    
    UT included.
    
    ### Are there any user-facing changes?
    
    None.
    
    * GitHub Issue: #45393
    
    Authored-by: Rossi Sun <[email protected]>
    Signed-off-by: Rossi Sun <[email protected]>
---
 cpp/src/arrow/compute/row/encode_internal.cc |  2 +-
 cpp/src/arrow/compute/row/grouper_test.cc    | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/row/encode_internal.cc 
b/cpp/src/arrow/compute/row/encode_internal.cc
index 0e2720a286..0d57cb83bf 100644
--- a/cpp/src/arrow/compute/row/encode_internal.cc
+++ b/cpp/src/arrow/compute/row/encode_internal.cc
@@ -279,7 +279,7 @@ void EncoderInteger::Decode(uint32_t start_row, uint32_t 
num_rows,
       case 4:
         for (uint32_t i = 0; i < num_rows; ++i) {
           reinterpret_cast<uint32_t*>(col_base)[i] = *reinterpret_cast<const 
uint32_t*>(
-              rows.fixed_length_rows(start_row + i + offset_within_row));
+              rows.fixed_length_rows(start_row + i) + offset_within_row);
         }
         break;
       case 8:
diff --git a/cpp/src/arrow/compute/row/grouper_test.cc 
b/cpp/src/arrow/compute/row/grouper_test.cc
index 0b8d8da0a6..3ed2fde2e9 100644
--- a/cpp/src/arrow/compute/row/grouper_test.cc
+++ b/cpp/src/arrow/compute/row/grouper_test.cc
@@ -825,6 +825,28 @@ TEST(Grouper, DictKey) {
                                   g.grouper_->Consume(dict_span));
 }
 
+// GH-45393: Test combinations of numeric type keys of different lengths.
+TEST(Grouper, MultipleIntKeys) {
+  auto types = NumericTypes();
+  for (auto& t0 : types) {
+    ARROW_SCOPED_TRACE("t0=", t0->ToString());
+    for (auto& t1 : types) {
+      ARROW_SCOPED_TRACE("t1=", t1->ToString());
+      for (auto& t2 : types) {
+        ARROW_SCOPED_TRACE("t2=", t2->ToString());
+        TestGrouper g({t0, t1, t2});
+
+        g.ExpectConsume(R"([[0, 1, 2], [0, 1, 2]])", "[0, 0]");
+        g.ExpectConsume(R"([[0, 1, 2], [null, 1, 2]])", "[0, 1]");
+        g.ExpectConsume(R"([[0, 1, 2], [0, null, 2]])", "[0, 2]");
+        g.ExpectConsume(R"([[0, 1, 2], [0, 1, null]])", "[0, 3]");
+
+        g.ExpectUniques("[[0, 1, 2], [null, 1, 2], [0, null, 2], [0, 1, 
null]]");
+      }
+    }
+  }
+}
+
 TEST(Grouper, StringInt64Key) {
   TestGrouper g({utf8(), int64()});
 

Reply via email to