This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new bfcb96879 fix: better int96 support for experimental native scans 
(#1652)
bfcb96879 is described below

commit bfcb9687955ceb5e15bfb1f94491a805c8b6d931
Author: Matt Butrovich <[email protected]>
AuthorDate: Fri Apr 18 16:14:26 2025 -0400

    fix: better int96 support for experimental native scans (#1652)
---
 native/core/src/parquet/parquet_exec.rs             |  1 +
 native/core/src/parquet/parquet_support.rs          | 21 +++++++++++++--------
 .../scala/org/apache/comet/CometCastSuite.scala     |  6 ------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/native/core/src/parquet/parquet_exec.rs 
b/native/core/src/parquet/parquet_exec.rs
index b379d3632..fa7213fdf 100644
--- a/native/core/src/parquet/parquet_exec.rs
+++ b/native/core/src/parquet/parquet_exec.rs
@@ -118,6 +118,7 @@ fn get_options(session_timezone: &str) -> 
(TableParquetOptions, SparkParquetOpti
     let mut table_parquet_options = TableParquetOptions::new();
     table_parquet_options.global.pushdown_filters = true;
     table_parquet_options.global.reorder_filters = true;
+    table_parquet_options.global.coerce_int96 = Some("us".to_string());
     let mut spark_parquet_options =
         SparkParquetOptions::new(EvalMode::Legacy, session_timezone, false);
     spark_parquet_options.allow_cast_unsigned_ints = true;
diff --git a/native/core/src/parquet/parquet_support.rs 
b/native/core/src/parquet/parquet_support.rs
index 33abdaea5..6bf0f0fe4 100644
--- a/native/core/src/parquet/parquet_support.rs
+++ b/native/core/src/parquet/parquet_support.rs
@@ -16,18 +16,19 @@
 // under the License.
 
 use crate::execution::operators::ExecutionError;
-use arrow::array::{new_null_array, DictionaryArray, StructArray};
-use arrow::datatypes::DataType;
 use arrow::{
-    array::{cast::AsArray, types::Int32Type, Array, ArrayRef},
+    array::{
+        cast::AsArray, new_null_array, types::Int32Type, 
types::TimestampMicrosecondType, Array,
+        ArrayRef, DictionaryArray, StructArray,
+    },
     compute::{cast_with_options, take, CastOptions},
+    datatypes::{DataType, TimeUnit},
     util::display::FormatOptions,
 };
 use datafusion::common::{Result as DataFusionResult, ScalarValue};
 use datafusion::execution::object_store::ObjectStoreUrl;
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::physical_plan::ColumnarValue;
-use datafusion_comet_spark_expr::utils::array_with_timezone;
 use datafusion_comet_spark_expr::EvalMode;
 use object_store::path::Path;
 use object_store::{parse_url, ObjectStore};
@@ -128,10 +129,6 @@ fn cast_array(
     parquet_options: &SparkParquetOptions,
 ) -> DataFusionResult<ArrayRef> {
     use DataType::*;
-    let array = match to_type {
-        Timestamp(_, None) => array, // array_with_timezone does not support 
to_type of NTZ.
-        _ => array_with_timezone(array, parquet_options.timezone.clone(), 
Some(to_type))?,
-    };
     let from_type = array.data_type().clone();
 
     let array = match &from_type {
@@ -166,6 +163,14 @@ fn cast_array(
             to_type,
             parquet_options,
         )?),
+        (Timestamp(TimeUnit::Microsecond, None), 
Timestamp(TimeUnit::Microsecond, Some(tz))) => {
+            Ok(Arc::new(
+                array
+                    .as_primitive::<TimestampMicrosecondType>()
+                    .reinterpret_cast::<TimestampMicrosecondType>()
+                    .with_timezone(Arc::clone(tz)),
+            ))
+        }
         _ => Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?),
     }
 }
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 68febbab2..45b483853 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -900,8 +900,6 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
   }
 
   test("cast TimestampType to LongType") {
-    // https://github.com/apache/datafusion-comet/issues/1441
-    assume(!CometConf.isExperimentalNativeScan)
     castTest(generateTimestampsExtended(), DataTypes.LongType)
   }
 
@@ -924,14 +922,10 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
   }
 
   test("cast TimestampType to StringType") {
-    // https://github.com/apache/datafusion-comet/issues/1441
-    assume(!CometConf.isExperimentalNativeScan)
     castTest(generateTimestamps(), DataTypes.StringType)
   }
 
   test("cast TimestampType to DateType") {
-    // https://github.com/apache/datafusion-comet/issues/1441
-    assume(!CometConf.isExperimentalNativeScan)
     castTest(generateTimestamps(), DataTypes.DateType)
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to