This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new bfcb96879 fix: better int96 support for experimental native scans
(#1652)
bfcb96879 is described below
commit bfcb9687955ceb5e15bfb1f94491a805c8b6d931
Author: Matt Butrovich <[email protected]>
AuthorDate: Fri Apr 18 16:14:26 2025 -0400
fix: better int96 support for experimental native scans (#1652)
---
native/core/src/parquet/parquet_exec.rs | 1 +
native/core/src/parquet/parquet_support.rs | 21 +++++++++++++--------
.../scala/org/apache/comet/CometCastSuite.scala | 6 ------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/native/core/src/parquet/parquet_exec.rs
b/native/core/src/parquet/parquet_exec.rs
index b379d3632..fa7213fdf 100644
--- a/native/core/src/parquet/parquet_exec.rs
+++ b/native/core/src/parquet/parquet_exec.rs
@@ -118,6 +118,7 @@ fn get_options(session_timezone: &str) ->
(TableParquetOptions, SparkParquetOpti
let mut table_parquet_options = TableParquetOptions::new();
table_parquet_options.global.pushdown_filters = true;
table_parquet_options.global.reorder_filters = true;
+ table_parquet_options.global.coerce_int96 = Some("us".to_string());
let mut spark_parquet_options =
SparkParquetOptions::new(EvalMode::Legacy, session_timezone, false);
spark_parquet_options.allow_cast_unsigned_ints = true;
diff --git a/native/core/src/parquet/parquet_support.rs
b/native/core/src/parquet/parquet_support.rs
index 33abdaea5..6bf0f0fe4 100644
--- a/native/core/src/parquet/parquet_support.rs
+++ b/native/core/src/parquet/parquet_support.rs
@@ -16,18 +16,19 @@
// under the License.
use crate::execution::operators::ExecutionError;
-use arrow::array::{new_null_array, DictionaryArray, StructArray};
-use arrow::datatypes::DataType;
use arrow::{
- array::{cast::AsArray, types::Int32Type, Array, ArrayRef},
+ array::{
+ cast::AsArray, new_null_array, types::Int32Type,
types::TimestampMicrosecondType, Array,
+ ArrayRef, DictionaryArray, StructArray,
+ },
compute::{cast_with_options, take, CastOptions},
+ datatypes::{DataType, TimeUnit},
util::display::FormatOptions,
};
use datafusion::common::{Result as DataFusionResult, ScalarValue};
use datafusion::execution::object_store::ObjectStoreUrl;
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::physical_plan::ColumnarValue;
-use datafusion_comet_spark_expr::utils::array_with_timezone;
use datafusion_comet_spark_expr::EvalMode;
use object_store::path::Path;
use object_store::{parse_url, ObjectStore};
@@ -128,10 +129,6 @@ fn cast_array(
parquet_options: &SparkParquetOptions,
) -> DataFusionResult<ArrayRef> {
use DataType::*;
- let array = match to_type {
- Timestamp(_, None) => array, // array_with_timezone does not support
to_type of NTZ.
- _ => array_with_timezone(array, parquet_options.timezone.clone(),
Some(to_type))?,
- };
let from_type = array.data_type().clone();
let array = match &from_type {
@@ -166,6 +163,14 @@ fn cast_array(
to_type,
parquet_options,
)?),
+ (Timestamp(TimeUnit::Microsecond, None),
Timestamp(TimeUnit::Microsecond, Some(tz))) => {
+ Ok(Arc::new(
+ array
+ .as_primitive::<TimestampMicrosecondType>()
+ .reinterpret_cast::<TimestampMicrosecondType>()
+ .with_timezone(Arc::clone(tz)),
+ ))
+ }
_ => Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?),
}
}
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 68febbab2..45b483853 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -900,8 +900,6 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("cast TimestampType to LongType") {
- // https://github.com/apache/datafusion-comet/issues/1441
- assume(!CometConf.isExperimentalNativeScan)
castTest(generateTimestampsExtended(), DataTypes.LongType)
}
@@ -924,14 +922,10 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
test("cast TimestampType to StringType") {
- // https://github.com/apache/datafusion-comet/issues/1441
- assume(!CometConf.isExperimentalNativeScan)
castTest(generateTimestamps(), DataTypes.StringType)
}
test("cast TimestampType to DateType") {
- // https://github.com/apache/datafusion-comet/issues/1441
- assume(!CometConf.isExperimentalNativeScan)
castTest(generateTimestamps(), DataTypes.DateType)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]