This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 2e6beb434d Minor: CastExpr Ordering Handle (#10650)
2e6beb434d is described below
commit 2e6beb434d45693bc8c7ba636b0407c26c04fc37
Author: Berkay Şahin <[email protected]>
AuthorDate: Mon May 27 15:22:51 2024 +0300
Minor: CastExpr Ordering Handle (#10650)
* header option removed
* Update csv.rs
* Update path_partition.rs
* Update path_partition.rs
* adding test
* adding test
* Revert
---
datafusion/core/src/datasource/file_format/csv.rs | 2 --
.../core/src/datasource/listing_table_factory.rs | 6 +++--
.../physical-expr-common/src/expressions/cast.rs | 3 ++-
datafusion/sqllogictest/test_files/order.slt | 28 +++++++++++++++++++++-
4 files changed, 33 insertions(+), 6 deletions(-)
diff --git a/datafusion/core/src/datasource/file_format/csv.rs
b/datafusion/core/src/datasource/file_format/csv.rs
index ae5ac52025..369534d620 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -734,9 +734,7 @@ mod tests {
let mut cfg = SessionConfig::new();
cfg.options_mut().catalog.has_header = true;
let session_state = SessionState::new_with_config_rt(cfg, runtime);
-
let integration =
LocalFileSystem::new_with_prefix(arrow_test_data()).unwrap();
-
let path = Path::from("csv/aggregate_test_100.csv");
let csv = CsvFormat::default().with_has_header(true);
let records_to_read = csv.options().schema_infer_max_rec;
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs
b/datafusion/core/src/datasource/listing_table_factory.rs
index 987b9e12a4..6e47498243 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -34,6 +34,7 @@ use crate::datasource::TableProvider;
use crate::execution::context::SessionState;
use arrow::datatypes::{DataType, SchemaRef};
+use datafusion_common::Result;
use datafusion_common::{arrow_datafusion_err, DataFusionError, FileType};
use datafusion_expr::CreateExternalTable;
@@ -56,13 +57,14 @@ impl TableProviderFactory for ListingTableFactory {
&self,
state: &SessionState,
cmd: &CreateExternalTable,
- ) -> datafusion_common::Result<Arc<dyn TableProvider>> {
- let mut table_options = state.default_table_options();
+ ) -> Result<Arc<dyn TableProvider>> {
let file_type = FileType::from_str(cmd.file_type.as_str()).map_err(|_|
{
DataFusionError::Execution(format!("Unknown FileType {}",
cmd.file_type))
})?;
+ let mut table_options = state.default_table_options();
table_options.set_file_format(file_type.clone());
table_options.alter_with_string_hash_map(&cmd.options)?;
+
let file_extension = get_extension(cmd.location.as_str());
let file_format: Arc<dyn FileFormat> = match file_type {
FileType::CSV => {
diff --git a/datafusion/physical-expr-common/src/expressions/cast.rs
b/datafusion/physical-expr-common/src/expressions/cast.rs
index 8ef3d16f63..31b96889fd 100644
--- a/datafusion/physical-expr-common/src/expressions/cast.rs
+++ b/datafusion/physical-expr-common/src/expressions/cast.rs
@@ -169,7 +169,8 @@ impl PhysicalExpr for CastExpr {
let target_type = &self.cast_type;
let unbounded = Interval::make_unbounded(target_type)?;
- if source_datatype.is_numeric() && target_type.is_numeric()
+ if (source_datatype.is_numeric() || source_datatype == Boolean)
+ && target_type.is_numeric()
|| source_datatype.is_temporal() && target_type.is_temporal()
|| source_datatype.eq(target_type)
{
diff --git a/datafusion/sqllogictest/test_files/order.slt
b/datafusion/sqllogictest/test_files/order.slt
index 066536a9d2..d7f10537d0 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -1105,4 +1105,30 @@ physical_plan
02)--SortPreservingMergeExec: [abs_c@0 ASC NULLS LAST], fetch=5
03)----ProjectionExec: expr=[abs(c@0) as abs_c]
04)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-05)--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c],
output_ordering=[c@0 ASC NULLS LAST], has_header=true
\ No newline at end of file
+05)--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c],
output_ordering=[c@0 ASC NULLS LAST], has_header=true
+
+# Boolean to integer casts preserve the order.
+statement ok
+CREATE EXTERNAL TABLE annotated_data_finite (
+ ts INTEGER,
+ inc_col INTEGER,
+ desc_col INTEGER,
+)
+STORED AS CSV
+WITH ORDER (inc_col ASC)
+WITH ORDER (desc_col DESC)
+LOCATION '../core/tests/data/window_1.csv'
+OPTIONS ('format.has_header' 'true');
+
+query TT
+EXPLAIN SELECT CAST((inc_col>desc_col) as integer) as c from
annotated_data_finite order by c;
+----
+logical_plan
+01)Sort: c ASC NULLS LAST
+02)--Projection: CAST(annotated_data_finite.inc_col >
annotated_data_finite.desc_col AS Int32) AS c
+03)----TableScan: annotated_data_finite projection=[inc_col, desc_col]
+physical_plan
+01)SortPreservingMergeExec: [c@0 ASC NULLS LAST]
+02)--ProjectionExec: expr=[CAST(inc_col@0 > desc_col@1 AS Int32) as c]
+03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+04)------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]},
projection=[inc_col, desc_col], output_orderings=[[inc_col@0 ASC NULLS LAST],
[desc_col@1 DESC]], has_header=true
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]