This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 1dc092f6 chore: Add Miri workflow (#636)
1dc092f6 is described below
commit 1dc092f67a0714fe9721a44862f4e91063a87f58
Author: Andy Grove <[email protected]>
AuthorDate: Wed Jul 10 18:33:52 2024 -0600
chore: Add Miri workflow (#636)
* add miri workflow
* fix
* ignore some tests when miri is enabled
* fix one safety warning and ignore some slow tests
* fmt
* prepare for review
* add comments
* update to reflect change in directory name
---
.github/workflows/miri.yml | 49 ++++++++++++++++++++++
native/core/Cargo.toml | 1 +
native/core/src/errors.rs | 9 ++++
.../src/execution/datafusion/expressions/cast.rs | 2 +
.../execution/datafusion/expressions/xxhash64.rs | 1 +
.../src/execution/datafusion/shuffle_writer.rs | 1 +
native/core/src/execution/datafusion/spark_hash.rs | 5 +--
native/core/src/execution/kernels/temporal.rs | 4 ++
native/core/src/execution/sort.rs | 2 +-
9 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml
new file mode 100644
index 00000000..a07ecc35
--- /dev/null
+++ b/.github/workflows/miri.yml
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Run Miri Safety Checks
+
+on:
+ push:
+ paths-ignore:
+ - "doc/**"
+ - "docs/**"
+ - "**.md"
+ pull_request:
+ paths-ignore:
+ - "doc/**"
+ - "docs/**"
+ - "**.md"
+ # manual trigger
+ #
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+ workflow_dispatch:
+
+jobs:
+ miri:
+ name: "Miri"
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Install Miri
+ run: |
+ rustup toolchain install nightly --component miri
+ rustup override set nightly
+ cargo miri setup
+ - name: Test with Miri
+ run: |
+ cd native
+ MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 3820bdd3..6432118d 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -93,6 +93,7 @@ twox-hash = "1.6.3"
[features]
default = []
+nightly = []
[lib]
name = "comet"
diff --git a/native/core/src/errors.rs b/native/core/src/errors.rs
index 7b0b5744..8c02a72d 100644
--- a/native/core/src/errors.rs
+++ b/native/core/src/errors.rs
@@ -586,6 +586,7 @@ mod tests {
}
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn error_from_panic() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -604,6 +605,7 @@ mod tests {
// Verify that functions that return an object are handled correctly.
This is basically
// a test of the "happy path".
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn object_result() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -621,6 +623,7 @@ mod tests {
// Verify that functions that return an native time are handled correctly.
This is basically
// a test of the "happy path".
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn jlong_result() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -637,6 +640,7 @@ mod tests {
// Verify that functions that return an array can handle throwing
exceptions. The test
// causes an exception by dividing by zero.
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn jlong_panic_exception() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -657,6 +661,7 @@ mod tests {
// Verify that functions that return an native time are handled correctly.
This is basically
// a test of the "happy path".
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn jlong_result_ok() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -673,6 +678,7 @@ mod tests {
// Verify that functions that return an native time are handled correctly.
This is basically
// a test of the "happy path".
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn jlong_result_err() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -693,6 +699,7 @@ mod tests {
// Verify that functions that return an array are handled correctly. This
is basically
// a test of the "happy path".
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn jint_array_result() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -713,6 +720,7 @@ mod tests {
// Verify that functions that return an array can handle throwing
exceptions. The test
// causes an exception by dividing by zero.
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn jint_array_panic_exception() {
let _guard = attach_current_thread();
let mut env = jvm().get_env().unwrap();
@@ -736,6 +744,7 @@ mod tests {
/// See [`object_panic_exception`] for a test which involves generating a
panic and verifying
/// that the resulting stack trace includes the offending call.
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
pub fn stacktrace_string() {
// Setup: Start with a backtrace that includes all of the expected
scenarios, including
// cases where the file and location are not provided as part of the
backtrace capture
diff --git a/native/core/src/execution/datafusion/expressions/cast.rs
b/native/core/src/execution/datafusion/expressions/cast.rs
index 9e3205ce..154ff28b 100644
--- a/native/core/src/execution/datafusion/expressions/cast.rs
+++ b/native/core/src/execution/datafusion/expressions/cast.rs
@@ -1646,6 +1646,7 @@ mod tests {
use super::*;
#[test]
+ #[cfg_attr(miri, ignore)] // test takes too long with miri
fn timestamp_parser_test() {
// write for all formats
assert_eq!(
@@ -1683,6 +1684,7 @@ mod tests {
}
#[test]
+ #[cfg_attr(miri, ignore)] // test takes too long with miri
fn test_cast_string_to_timestamp() {
let array: ArrayRef = Arc::new(StringArray::from(vec![
Some("2020-01-01T12:34:56.123456"),
diff --git a/native/core/src/execution/datafusion/expressions/xxhash64.rs
b/native/core/src/execution/datafusion/expressions/xxhash64.rs
index 94b9e04b..508cfe59 100644
--- a/native/core/src/execution/datafusion/expressions/xxhash64.rs
+++ b/native/core/src/execution/datafusion/expressions/xxhash64.rs
@@ -162,6 +162,7 @@ mod test {
use twox_hash::XxHash64;
#[test]
+ #[cfg_attr(miri, ignore)] // test takes too long with miri
fn test_xxhash64_random() {
let mut rng = rand::thread_rng();
for len in 0..128 {
diff --git a/native/core/src/execution/datafusion/shuffle_writer.rs
b/native/core/src/execution/datafusion/shuffle_writer.rs
index 6e59ce53..3b934813 100644
--- a/native/core/src/execution/datafusion/shuffle_writer.rs
+++ b/native/core/src/execution/datafusion/shuffle_writer.rs
@@ -1451,6 +1451,7 @@ mod test {
}
#[test]
+ #[cfg_attr(miri, ignore)] // miri can't call foreign function
`ZSTD_createCCtx`
fn test_insert_larger_batch() {
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8,
true)]));
let mut b = StringBuilder::new();
diff --git a/native/core/src/execution/datafusion/spark_hash.rs
b/native/core/src/execution/datafusion/spark_hash.rs
index 1f0658ae..15bded3a 100644
--- a/native/core/src/execution/datafusion/spark_hash.rs
+++ b/native/core/src/execution/datafusion/spark_hash.rs
@@ -89,10 +89,7 @@ pub(crate) fn spark_compatible_murmur3_hash<T:
AsRef<[u8]>>(data: T, seed: u32)
// data is &[u8] so we do not need to check for proper alignment
unsafe {
let mut h1 = if len_aligned > 0 {
- hash_bytes_by_int(
- std::slice::from_raw_parts(data.get_unchecked(0), len_aligned),
- seed,
- )
+ hash_bytes_by_int(&data[0..len_aligned], seed)
} else {
seed as i32
};
diff --git a/native/core/src/execution/kernels/temporal.rs
b/native/core/src/execution/kernels/temporal.rs
index 1868c6fe..9cf35af1 100644
--- a/native/core/src/execution/kernels/temporal.rs
+++ b/native/core/src/execution/kernels/temporal.rs
@@ -824,6 +824,7 @@ mod tests {
use std::sync::Arc;
#[test]
+ #[cfg_attr(miri, ignore)] // test takes too long with miri
fn test_date_trunc() {
let size = 1000;
let mut vec: Vec<i32> = Vec::with_capacity(size);
@@ -962,6 +963,7 @@ mod tests {
}
#[test]
+ #[cfg_attr(miri, ignore)] // test takes too long with miri
fn test_timestamp_trunc() {
let size = 1000;
let mut vec: Vec<i64> = Vec::with_capacity(size);
@@ -998,6 +1000,8 @@ mod tests {
}
#[test]
+ // test takes too long with miri
+ #[cfg_attr(miri, ignore)]
// This test only verifies that the various input array types work.
Actually correctness to
// ensure this produces the same results as spark is verified in the JVM
tests
fn test_timestamp_trunc_array_fmt_dyn() {
diff --git a/native/core/src/execution/sort.rs
b/native/core/src/execution/sort.rs
index eeeb11d5..b8687652 100644
--- a/native/core/src/execution/sort.rs
+++ b/native/core/src/execution/sort.rs
@@ -165,7 +165,7 @@ where
// because they are defined as Vec<Vec<T>>
ptr::copy_nonoverlapping(
bucket.as_ptr(),
- self.get_unchecked_mut(pos),
+ self.as_mut_ptr().add(pos),
bucket.len(),
);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]