(arrow) branch main updated: GH-48160: [C++][Gandiva] Pass CPU attributes to LLVM (#48161)

kou Sat, 24 Jan 2026 23:45:48 -0800

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 7bc47c0650 GH-48160: [C++][Gandiva] Pass CPU attributes to LLVM 
(#48161)
7bc47c0650 is described below

commit 7bc47c06509601e09c83177bda061c4faca84809
Author: Logan Riggs <[email protected]>
AuthorDate: Sat Jan 24 23:45:30 2026 -0800

    GH-48160: [C++][Gandiva] Pass CPU attributes to LLVM (#48161)
    
    ### Rationale for this change
    
    The CPU attributes are not passed to the LLVM layer, which means potential 
optimizations could be missed leading to inefficient code. This feature was 
lost as part of the refactoring in 
https://github.com/apache/arrow/commit/83cba25017a5c3a03e47f1851f242fa284f93533
    
    I also discovered a bug with decimal alignments that was exposed by this 
change and was only reproducible in our test environment.
    
    ### What changes are included in this PR?
    
    Pass the CPU attributes to the LLVM code generation, and a unit test.
    Fix the 16 bit vs 8 bit decimal alignment problem. This was causing a crash 
sometimes on certain architectures with certain queries. Added a unit test.
    
    ### Are these changes tested?
    Yes.
    
    ### Are there any user-facing changes?
    No.
    
    * GitHub Issue: #48160
    
    Lead-authored-by: Logan Riggs <[email protected]>
    Co-authored-by: lriggs <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/gandiva/CMakeLists.txt                  |   1 +
 cpp/src/gandiva/engine.cc                       |   1 +
 cpp/src/gandiva/llvm_generator.cc               |  16 +-
 cpp/src/gandiva/target_datalayout_test.cc       |  39 ++++
 cpp/src/gandiva/tests/CMakeLists.txt            |   1 +
 cpp/src/gandiva/tests/decimal_alignment_test.cc | 252 ++++++++++++++++++++++++
 6 files changed, 307 insertions(+), 3 deletions(-)

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index e5760243b3..31a86d5da9 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -274,6 +274,7 @@ add_gandiva_test(internals-test
                  hash_utils_test.cc
                  gdv_function_stubs_test.cc
                  interval_holder_test.cc
+                 target_datalayout_test.cc
                  tests/test_util.cc
                  EXTRA_LINK_LIBS
                  re2::re2
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index 64ed433a68..a718a80060 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -330,6 +330,7 @@ Engine::Engine(const std::shared_ptr<Configuration>& conf,
   // LLVM 10 doesn't like the expr function name to be the same as the module 
name
   auto module_id = "gdv_module_" + 
std::to_string(reinterpret_cast<uintptr_t>(this));
   module_ = std::make_unique<llvm::Module>(module_id, *context_);
+  module_->setDataLayout(target_machine_->createDataLayout());
 }
 
 Engine::~Engine() {}
diff --git a/cpp/src/gandiva/llvm_generator.cc 
b/cpp/src/gandiva/llvm_generator.cc
index 4e6480fa16..0f0918b3a1 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -399,8 +399,13 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, 
int buffer_count,
 
   if (output_type_id == arrow::Type::BOOL) {
     SetPackedBitValue(output_ref, loop_var, output_value->data());
-  } else if (arrow::is_primitive(output_type_id) ||
-             output_type_id == arrow::Type::DECIMAL) {
+  } else if (output_type_id == arrow::Type::DECIMAL) {
+    // Arrow decimal128 data is only 8-byte aligned, not 16-byte aligned.
+    // Use CreateAlignedStore with 8-byte alignment to match Arrow's actual 
alignment.
+    auto slot_offset =
+        builder->CreateGEP(types()->IRType(output_type_id), output_ref, 
loop_var);
+    builder->CreateAlignedStore(output_value->data(), slot_offset, 
llvm::MaybeAlign(8));
+  } else if (arrow::is_primitive(output_type_id)) {
     auto slot_offset =
         builder->CreateGEP(types()->IRType(output_type_id), output_ref, 
loop_var);
     builder->CreateStore(output_value->data(), slot_offset);
@@ -602,7 +607,12 @@ void LLVMGenerator::Visitor::Visit(const 
VectorReadFixedLenValueDex& dex) {
 
     case arrow::Type::DECIMAL: {
       auto slot_offset = builder->CreateGEP(types->i128_type(), slot_ref, 
slot_index);
-      slot_value = builder->CreateLoad(types->i128_type(), slot_offset, 
dex.FieldName());
+      // Arrow decimal128 data is only 8-byte aligned, not 16-byte aligned.
+      // Using CreateLoad with default alignment (16 for i128) causes crashes 
on
+      // misaligned data. Use CreateAlignedLoad with 8-byte alignment to match 
Arrow's
+      // actual alignment.
+      slot_value = builder->CreateAlignedLoad(
+          types->i128_type(), slot_offset, llvm::MaybeAlign(8), false, 
dex.FieldName());
       lvalue = generator_->BuildDecimalLValue(slot_value, dex.FieldType());
       break;
     }
diff --git a/cpp/src/gandiva/target_datalayout_test.cc 
b/cpp/src/gandiva/target_datalayout_test.cc
new file mode 100644
index 0000000000..0b32c6caf9
--- /dev/null
+++ b/cpp/src/gandiva/target_datalayout_test.cc
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <llvm/IR/DataLayout.h>
+
+#include "gandiva/llvm_generator.h"
+#include "gandiva/tests/test_util.h"
+
+namespace gandiva {
+
+// Test that verifies the target data layout string representation
+// is populated.
+TEST(TestTargetDataLayout, VerifyDataLayoutForArchitecture) {
+  ASSERT_OK_AND_ASSIGN(auto generator, 
LLVMGenerator::Make(TestConfiguration(), false));
+
+  llvm::Module* module = generator->module();
+  ASSERT_NE(module, nullptr);
+
+  const llvm::DataLayout& data_layout = module->getDataLayout();
+  std::string data_layout_str = data_layout.getStringRepresentation();
+
+  ASSERT_FALSE(data_layout_str.empty());
+}
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/CMakeLists.txt 
b/cpp/src/gandiva/tests/CMakeLists.txt
index 68138f50d8..356b976e00 100644
--- a/cpp/src/gandiva/tests/CMakeLists.txt
+++ b/cpp/src/gandiva/tests/CMakeLists.txt
@@ -20,6 +20,7 @@ add_gandiva_test(projector-test
                  binary_test.cc
                  boolean_expr_test.cc
                  date_time_test.cc
+                 decimal_alignment_test.cc
                  decimal_single_test.cc
                  decimal_test.cc
                  filter_project_test.cc
diff --git a/cpp/src/gandiva/tests/decimal_alignment_test.cc 
b/cpp/src/gandiva/tests/decimal_alignment_test.cc
new file mode 100644
index 0000000000..3028ec81f3
--- /dev/null
+++ b/cpp/src/gandiva/tests/decimal_alignment_test.cc
@@ -0,0 +1,252 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test for decimal128 alignment issue fix.
+// Arrow decimal128 data may be 8-byte aligned but not 16-byte aligned.
+// This test verifies that Gandiva handles such data correctly.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array/array_decimal.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/decimal.h"
+
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/projector.h"
+#include "gandiva/tests/test_util.h"
+#include "gandiva/tree_expr_builder.h"
+
+using arrow::Decimal128;
+
+namespace gandiva {
+
+class TestDecimalAlignment : public ::testing::Test {
+ public:
+  void SetUp() { pool_ = arrow::default_memory_pool(); }
+
+ protected:
+  arrow::MemoryPool* pool_;
+};
+
+// Create a decimal128 array with data at a specific alignment offset
+// This simulates the real-world scenario where Arrow data from external 
sources
+// (like JNI/Java) may not be 16-byte aligned.
+std::shared_ptr<arrow::Array> MakeMisalignedDecimalArray(
+    const std::shared_ptr<arrow::Decimal128Type>& type,
+    const std::vector<Decimal128>& values, int alignment_offset) {
+  // Allocate buffer with extra space for misalignment
+  int64_t data_size = values.size() * 16;  // 16 bytes per Decimal128
+  int64_t buffer_size = data_size + 16;    // Extra space for offset
+
+  std::shared_ptr<arrow::Buffer> buffer;
+  ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));
+
+  // Calculate the starting offset to achieve desired alignment
+  // We want the data to be 8-byte aligned but NOT 16-byte aligned
+  uint8_t* raw_data = buffer->mutable_data();
+  uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);
+
+  // Find offset to get to 8-byte aligned but not 16-byte aligned address
+  int offset_to_8 = (8 - (addr % 8)) % 8;
+  int current_16_alignment = (addr + offset_to_8) % 16;
+
+  int final_offset;
+  if (alignment_offset == 8) {
+    // Want 8-byte aligned but NOT 16-byte aligned
+    if (current_16_alignment == 0) {
+      final_offset = offset_to_8 + 8;  // Add 8 to break 16-byte alignment
+    } else {
+      final_offset = offset_to_8;
+    }
+  } else {
+    // Want 16-byte aligned
+    final_offset = (16 - (addr % 16)) % 16;
+  }
+
+  // Copy decimal values to the offset location
+  uint8_t* data_start = raw_data + final_offset;
+  for (size_t i = 0; i < values.size(); i++) {
+    memcpy(data_start + i * 16, values[i].ToBytes().data(), 16);
+  }
+
+  // Verify alignment
+  uintptr_t data_addr = reinterpret_cast<uintptr_t>(data_start);
+  EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
+  if (alignment_offset == 8) {
+    EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
+  }
+
+  // Create a sliced buffer starting at our offset
+  auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);
+
+  // Create validity buffer (all valid)
+  std::shared_ptr<arrow::Buffer> validity_buffer;
+  ARROW_EXPECT_OK(arrow::AllocateBuffer((values.size() + 7) / 
8).Value(&validity_buffer));
+  memset(validity_buffer->mutable_data(), 0xFF, validity_buffer->size());
+
+  // Create the array with our misaligned data buffer
+  auto array_data = arrow::ArrayData::Make(type, 
static_cast<int64_t>(values.size()),
+                                           {validity_buffer, sliced_buffer});
+
+  return std::make_shared<arrow::Decimal128Array>(array_data);
+}
+
+// Test that decimal operations work correctly with 8-byte aligned (but not 
16-byte
+// aligned) data
+TEST_F(TestDecimalAlignment, TestMisalignedDecimalSubtract) {
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 17;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, 
scale);
+  auto field_a = arrow::field("a", decimal_type);
+  auto field_b = arrow::field("b", decimal_type);
+  auto schema = arrow::schema({field_a, field_b});
+
+  Decimal128TypePtr output_type;
+  auto status = DecimalTypeUtil::GetResultType(
+      DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, 
&output_type);
+  ASSERT_OK(status);
+
+  auto res = arrow::field("res", output_type);
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto subtract =
+      TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
+  auto expr = TreeExprBuilder::MakeExpression(subtract, res);
+
+  std::shared_ptr<Projector> projector;
+  status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  ASSERT_OK(status);
+
+  // Create test data
+  std::vector<Decimal128> values_a = {Decimal128(100), Decimal128(200), 
Decimal128(300)};
+  std::vector<Decimal128> values_b = {Decimal128(10), Decimal128(20), 
Decimal128(30)};
+
+  // Create arrays with 8-byte alignment (but NOT 16-byte aligned)
+  auto array_a = MakeMisalignedDecimalArray(decimal_type, values_a, 8);
+  auto array_b = MakeMisalignedDecimalArray(decimal_type, values_b, 8);
+
+  auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});
+
+  // This should NOT crash even with misaligned data
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  ASSERT_OK(status);
+
+  // Verify results: 100-10=90, 200-20=180, 300-30=270
+  auto result = std::dynamic_pointer_cast<arrow::Decimal128Array>(outputs[0]);
+  ASSERT_NE(result, nullptr);
+  EXPECT_EQ(result->length(), 3);
+}
+
+// Create a misaligned output buffer for decimal128
+std::shared_ptr<arrow::ArrayData> MakeMisalignedDecimalOutput(
+    const std::shared_ptr<arrow::Decimal128Type>& type, int64_t num_records,
+    int alignment_offset) {
+  // Allocate data buffer with extra space for misalignment
+  int64_t data_size = num_records * 16;  // 16 bytes per Decimal128
+  int64_t buffer_size = data_size + 16;  // Extra space for offset
+
+  std::shared_ptr<arrow::Buffer> buffer;
+  ARROW_EXPECT_OK(arrow::AllocateBuffer(buffer_size).Value(&buffer));
+
+  uint8_t* raw_data = const_cast<uint8_t*>(buffer->data());
+  uintptr_t addr = reinterpret_cast<uintptr_t>(raw_data);
+
+  // Find offset to get to 8-byte aligned but not 16-byte aligned address
+  int offset_to_8 = (8 - (addr % 8)) % 8;
+  int current_16_alignment = (addr + offset_to_8) % 16;
+
+  int final_offset;
+  if (alignment_offset == 8) {
+    if (current_16_alignment == 0) {
+      final_offset = offset_to_8 + 8;
+    } else {
+      final_offset = offset_to_8;
+    }
+  } else {
+    final_offset = (16 - (addr % 16)) % 16;
+  }
+
+  // Verify alignment
+  uintptr_t data_addr = reinterpret_cast<uintptr_t>(raw_data + final_offset);
+  EXPECT_EQ(data_addr % 8, 0) << "Data should be 8-byte aligned";
+  if (alignment_offset == 8) {
+    EXPECT_NE(data_addr % 16, 0) << "Data should NOT be 16-byte aligned";
+  }
+
+  auto sliced_buffer = arrow::SliceBuffer(buffer, final_offset, data_size);
+
+  // Create validity buffer
+  int64_t bitmap_size = (num_records + 7) / 8;
+  std::shared_ptr<arrow::Buffer> validity_buffer;
+  ARROW_EXPECT_OK(arrow::AllocateBuffer(bitmap_size).Value(&validity_buffer));
+  memset(const_cast<uint8_t*>(validity_buffer->data()), 0xFF, 
validity_buffer->size());
+
+  return arrow::ArrayData::Make(type, num_records, {validity_buffer, 
sliced_buffer});
+}
+
+// Test that decimal STORES work correctly with 8-byte aligned (but not 
16-byte aligned)
+// output
+TEST_F(TestDecimalAlignment, TestMisalignedDecimalStore) {
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 17;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, 
scale);
+  auto field_a = arrow::field("a", decimal_type);
+  auto field_b = arrow::field("b", decimal_type);
+  auto schema = arrow::schema({field_a, field_b});
+
+  Decimal128TypePtr output_type;
+  auto status = DecimalTypeUtil::GetResultType(
+      DecimalTypeUtil::kOpSubtract, {decimal_type, decimal_type}, 
&output_type);
+  ASSERT_OK(status);
+
+  auto res = arrow::field("res", output_type);
+  auto node_a = TreeExprBuilder::MakeField(field_a);
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto subtract =
+      TreeExprBuilder::MakeFunction("subtract", {node_a, node_b}, output_type);
+  auto expr = TreeExprBuilder::MakeExpression(subtract, res);
+
+  std::shared_ptr<Projector> projector;
+  status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  ASSERT_OK(status);
+
+  // Create ALIGNED input arrays (using standard Arrow allocation)
+  auto array_a = MakeArrowArrayDecimal(
+      decimal_type, {Decimal128(100), Decimal128(200), Decimal128(300)},
+      {true, true, true});
+  auto array_b = MakeArrowArrayDecimal(
+      decimal_type, {Decimal128(10), Decimal128(20), Decimal128(30)}, {true, 
true, true});
+
+  auto in_batch = arrow::RecordBatch::Make(schema, 3, {array_a, array_b});
+
+  // Create MISALIGNED output buffer (8-byte aligned but NOT 16-byte aligned)
+  auto output_data = MakeMisalignedDecimalOutput(output_type, 3, 8);
+
+  // This should NOT crash even with misaligned output buffer
+  status = projector->Evaluate(*in_batch, {output_data});
+  ASSERT_OK(status);
+
+  // Verify the output was written correctly
+  auto result = std::make_shared<arrow::Decimal128Array>(output_data);
+  EXPECT_EQ(result->length(), 3);
+}
+
+}  // namespace gandiva

(arrow) branch main updated: GH-48160: [C++][Gandiva] Pass CPU attributes to LLVM (#48161)

Reply via email to