This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 1dc092f6 chore: Add Miri workflow (#636)
1dc092f6 is described below

commit 1dc092f67a0714fe9721a44862f4e91063a87f58
Author: Andy Grove <[email protected]>
AuthorDate: Wed Jul 10 18:33:52 2024 -0600

    chore: Add Miri workflow (#636)
    
    * add miri workflow
    
    * fix
    
    * ignore some tests when miri is enabled
    
    * fix one safety warning and ignore some slow tests
    
    * fmt
    
    * prepare for review
    
    * add comments
    
    * update to reflect change in directory name
---
 .github/workflows/miri.yml                         | 49 ++++++++++++++++++++++
 native/core/Cargo.toml                             |  1 +
 native/core/src/errors.rs                          |  9 ++++
 .../src/execution/datafusion/expressions/cast.rs   |  2 +
 .../execution/datafusion/expressions/xxhash64.rs   |  1 +
 .../src/execution/datafusion/shuffle_writer.rs     |  1 +
 native/core/src/execution/datafusion/spark_hash.rs |  5 +--
 native/core/src/execution/kernels/temporal.rs      |  4 ++
 native/core/src/execution/sort.rs                  |  2 +-
 9 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml
new file mode 100644
index 00000000..a07ecc35
--- /dev/null
+++ b/.github/workflows/miri.yml
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Run Miri Safety Checks
+
+on:
+  push:
+    paths-ignore:
+      - "doc/**"
+      - "docs/**"
+      - "**.md"
+  pull_request:
+    paths-ignore:
+      - "doc/**"
+      - "docs/**"
+      - "**.md"
+  # manual trigger
+  # 
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+  workflow_dispatch:
+
+jobs:
+  miri:
+    name: "Miri"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install Miri
+        run: |
+          rustup toolchain install nightly --component miri
+          rustup override set nightly
+          cargo miri setup
+      - name: Test with Miri
+        run: |
+          cd native
+          MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 3820bdd3..6432118d 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -93,6 +93,7 @@ twox-hash = "1.6.3"
 
 [features]
 default = []
+nightly = []
 
 [lib]
 name = "comet"
diff --git a/native/core/src/errors.rs b/native/core/src/errors.rs
index 7b0b5744..8c02a72d 100644
--- a/native/core/src/errors.rs
+++ b/native/core/src/errors.rs
@@ -586,6 +586,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn error_from_panic() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -604,6 +605,7 @@ mod tests {
     // Verify that functions that return an object are handled correctly.  
This is basically
     // a test of the "happy path".
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn object_result() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -621,6 +623,7 @@ mod tests {
     // Verify that functions that return an native time are handled correctly. 
 This is basically
     // a test of the "happy path".
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn jlong_result() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -637,6 +640,7 @@ mod tests {
     // Verify that functions that return an array can handle throwing 
exceptions.  The test
     // causes an exception by dividing by zero.
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn jlong_panic_exception() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -657,6 +661,7 @@ mod tests {
     // Verify that functions that return an native time are handled correctly. 
 This is basically
     // a test of the "happy path".
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn jlong_result_ok() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -673,6 +678,7 @@ mod tests {
     // Verify that functions that return an native time are handled correctly. 
 This is basically
     // a test of the "happy path".
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn jlong_result_err() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -693,6 +699,7 @@ mod tests {
     // Verify that functions that return an array are handled correctly.  This 
is basically
     // a test of the "happy path".
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn jint_array_result() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -713,6 +720,7 @@ mod tests {
     // Verify that functions that return an array can handle throwing 
exceptions.  The test
     // causes an exception by dividing by zero.
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn jint_array_panic_exception() {
         let _guard = attach_current_thread();
         let mut env = jvm().get_env().unwrap();
@@ -736,6 +744,7 @@ mod tests {
     /// See [`object_panic_exception`] for a test which involves generating a 
panic and verifying
     /// that the resulting stack trace includes the offending call.
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function `dlopen`
     pub fn stacktrace_string() {
         // Setup: Start with a backtrace that includes all of the expected 
scenarios, including
         // cases where the file and location are not provided as part of the 
backtrace capture
diff --git a/native/core/src/execution/datafusion/expressions/cast.rs 
b/native/core/src/execution/datafusion/expressions/cast.rs
index 9e3205ce..154ff28b 100644
--- a/native/core/src/execution/datafusion/expressions/cast.rs
+++ b/native/core/src/execution/datafusion/expressions/cast.rs
@@ -1646,6 +1646,7 @@ mod tests {
     use super::*;
 
     #[test]
+    #[cfg_attr(miri, ignore)] // test takes too long with miri
     fn timestamp_parser_test() {
         // write for all formats
         assert_eq!(
@@ -1683,6 +1684,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)] // test takes too long with miri
     fn test_cast_string_to_timestamp() {
         let array: ArrayRef = Arc::new(StringArray::from(vec![
             Some("2020-01-01T12:34:56.123456"),
diff --git a/native/core/src/execution/datafusion/expressions/xxhash64.rs 
b/native/core/src/execution/datafusion/expressions/xxhash64.rs
index 94b9e04b..508cfe59 100644
--- a/native/core/src/execution/datafusion/expressions/xxhash64.rs
+++ b/native/core/src/execution/datafusion/expressions/xxhash64.rs
@@ -162,6 +162,7 @@ mod test {
     use twox_hash::XxHash64;
 
     #[test]
+    #[cfg_attr(miri, ignore)] // test takes too long with miri
     fn test_xxhash64_random() {
         let mut rng = rand::thread_rng();
         for len in 0..128 {
diff --git a/native/core/src/execution/datafusion/shuffle_writer.rs 
b/native/core/src/execution/datafusion/shuffle_writer.rs
index 6e59ce53..3b934813 100644
--- a/native/core/src/execution/datafusion/shuffle_writer.rs
+++ b/native/core/src/execution/datafusion/shuffle_writer.rs
@@ -1451,6 +1451,7 @@ mod test {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)] // miri can't call foreign function 
`ZSTD_createCCtx`
     fn test_insert_larger_batch() {
         let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, 
true)]));
         let mut b = StringBuilder::new();
diff --git a/native/core/src/execution/datafusion/spark_hash.rs 
b/native/core/src/execution/datafusion/spark_hash.rs
index 1f0658ae..15bded3a 100644
--- a/native/core/src/execution/datafusion/spark_hash.rs
+++ b/native/core/src/execution/datafusion/spark_hash.rs
@@ -89,10 +89,7 @@ pub(crate) fn spark_compatible_murmur3_hash<T: 
AsRef<[u8]>>(data: T, seed: u32)
     // data is &[u8] so we do not need to check for proper alignment
     unsafe {
         let mut h1 = if len_aligned > 0 {
-            hash_bytes_by_int(
-                std::slice::from_raw_parts(data.get_unchecked(0), len_aligned),
-                seed,
-            )
+            hash_bytes_by_int(&data[0..len_aligned], seed)
         } else {
             seed as i32
         };
diff --git a/native/core/src/execution/kernels/temporal.rs 
b/native/core/src/execution/kernels/temporal.rs
index 1868c6fe..9cf35af1 100644
--- a/native/core/src/execution/kernels/temporal.rs
+++ b/native/core/src/execution/kernels/temporal.rs
@@ -824,6 +824,7 @@ mod tests {
     use std::sync::Arc;
 
     #[test]
+    #[cfg_attr(miri, ignore)] // test takes too long with miri
     fn test_date_trunc() {
         let size = 1000;
         let mut vec: Vec<i32> = Vec::with_capacity(size);
@@ -962,6 +963,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)] // test takes too long with miri
     fn test_timestamp_trunc() {
         let size = 1000;
         let mut vec: Vec<i64> = Vec::with_capacity(size);
@@ -998,6 +1000,8 @@ mod tests {
     }
 
     #[test]
+    // test takes too long with miri
+    #[cfg_attr(miri, ignore)]
     // This test only verifies that the various input array types work. 
Actually correctness to
     // ensure this produces the same results as spark is verified in the JVM 
tests
     fn test_timestamp_trunc_array_fmt_dyn() {
diff --git a/native/core/src/execution/sort.rs 
b/native/core/src/execution/sort.rs
index eeeb11d5..b8687652 100644
--- a/native/core/src/execution/sort.rs
+++ b/native/core/src/execution/sort.rs
@@ -165,7 +165,7 @@ where
                         // because they are defined as Vec<Vec<T>>
                         ptr::copy_nonoverlapping(
                             bucket.as_ptr(),
-                            self.get_unchecked_mut(pos),
+                            self.as_mut_ptr().add(pos),
                             bucket.len(),
                         );
                     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to