This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new ae8e57c49 fix: resolve Miri UB in null struct field test, re-enable
Miri on PRs (#3669)
ae8e57c49 is described below
commit ae8e57c49dfed4a0988e26e52243469843d04d60
Author: Andy Grove <[email protected]>
AuthorDate: Wed Mar 11 13:31:29 2026 -0600
fix: resolve Miri UB in null struct field test, re-enable Miri on PRs
(#3669)
Add bounds-checking debug_assert in SparkUnsafeRow::get_element_offset
to catch out-of-bounds accesses early.
Fix test_append_null_struct_field_to_struct_builder which had an
undersized 8-byte buffer (only null bitset, no field slot) with null
bit unset, causing an out-of-bounds read in get_long. Use 16 bytes
with bit 0 set to properly represent a null field.
Re-enable Miri on pull_request trigger now that the upstream cargo
nightly regression (#3499) is resolved.
---
.github/workflows/miri.yml | 18 ++++++++----------
native/core/src/execution/shuffle/spark_unsafe/row.rs | 16 +++++++++++++---
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml
index c9ee6abdd..ea36e1359 100644
--- a/.github/workflows/miri.yml
+++ b/.github/workflows/miri.yml
@@ -28,16 +28,14 @@ on:
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
-# Disabled until Miri compatibility is restored
-# https://github.com/apache/datafusion-comet/issues/3499
-# pull_request:
-# paths-ignore:
-# - "doc/**"
-# - "docs/**"
-# - "**.md"
-# - "native/core/benches/**"
-# - "native/spark-expr/benches/**"
-# - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
+ pull_request:
+ paths-ignore:
+ - "doc/**"
+ - "docs/**"
+ - "**.md"
+ - "native/core/benches/**"
+ - "native/spark-expr/benches/**"
+ - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
# manual trigger
#
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
diff --git a/native/core/src/execution/shuffle/spark_unsafe/row.rs
b/native/core/src/execution/shuffle/spark_unsafe/row.rs
index 6b41afae8..4d937db76 100644
--- a/native/core/src/execution/shuffle/spark_unsafe/row.rs
+++ b/native/core/src/execution/shuffle/spark_unsafe/row.rs
@@ -255,8 +255,15 @@ impl SparkUnsafeObject for SparkUnsafeRow {
self.row_addr
}
- fn get_element_offset(&self, index: usize, _: usize) -> *const u8 {
- (self.row_addr + self.row_bitset_width + (index * 8) as i64) as *const
u8
+ fn get_element_offset(&self, index: usize, element_size: usize) -> *const
u8 {
+ let offset = self.row_bitset_width + (index * 8) as i64;
+ debug_assert!(
+ self.row_size >= 0 && offset + element_size as i64 <=
self.row_size as i64,
+ "get_element_offset: access at offset {offset} with size
{element_size} \
+ exceeds row_size {} for index {index}",
+ self.row_size
+ );
+ (self.row_addr + offset) as *const u8
}
}
@@ -1659,7 +1666,10 @@ mod test {
let fields = Fields::from(vec![Field::new("st", data_type.clone(),
true)]);
let mut struct_builder = StructBuilder::from_fields(fields, 1);
let mut row = SparkUnsafeRow::new_with_num_fields(1);
- let data = [0; 8];
+ // 8 bytes null bitset + 8 bytes field value = 16 bytes
+ // Set bit 0 in the null bitset to mark field 0 as null
+ let mut data = [0u8; 16];
+ data[0] = 1;
row.point_to_slice(&data);
append_field(&data_type, &mut struct_builder, &row, 0).expect("append
field");
struct_builder.append_null();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]