This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 9cda815a4a Minor: port more timestamp tests to sqllogictests (#5832)
9cda815a4a is described below
commit 9cda815a4afd1cb89529323258a35a6bf2013689
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Apr 3 15:06:51 2023 +0200
Minor: port more timestamp tests to sqllogictests (#5832)
* Minor: port more timestamp tests to sqllogictests
* fmt
---
datafusion/core/tests/sql/mod.rs | 18 --
datafusion/core/tests/sql/timestamp.rs | 347 ---------------------
.../tests/sqllogictests/test_files/timestamps.slt | 121 +++++++
3 files changed, 121 insertions(+), 365 deletions(-)
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 00918638e9..d9f05278c9 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -1144,20 +1144,6 @@ async fn plan_and_collect(ctx: &SessionContext, sql:
&str) -> Result<Vec<RecordB
ctx.sql(sql).await?.collect().await
}
-/// Execute query and return results as a Vec of RecordBatches or an error
-async fn try_execute_to_batches(
- ctx: &SessionContext,
- sql: &str,
-) -> Result<Vec<RecordBatch>> {
- let dataframe = ctx.sql(sql).await?;
- let logical_schema = dataframe.schema().clone();
- let (state, plan) = dataframe.into_parts();
-
- let optimized = state.optimize(&plan)?;
- assert_eq!(&logical_schema, optimized.schema().as_ref());
- DataFrame::new(state, optimized).collect().await
-}
-
/// Execute query and return results as a Vec of RecordBatches
async fn execute_to_batches(ctx: &SessionContext, sql: &str) ->
Vec<RecordBatch> {
let df = ctx.sql(sql).await.unwrap();
@@ -1379,10 +1365,6 @@ where
Ok(Arc::new(table))
}
-fn make_timestamp_nano_table() -> Result<Arc<MemTable>> {
- make_timestamp_table::<TimestampNanosecondType>()
-}
-
/// Return a new table provider that has a single Int32 column with
/// values between `seq_start` and `seq_end`
pub fn table_with_sequence(
diff --git a/datafusion/core/tests/sql/timestamp.rs
b/datafusion/core/tests/sql/timestamp.rs
index e62465203a..b4fa86d3be 100644
--- a/datafusion/core/tests/sql/timestamp.rs
+++ b/datafusion/core/tests/sql/timestamp.rs
@@ -17,28 +17,8 @@
use super::*;
use datafusion::from_slice::FromSlice;
-use datafusion_common::ScalarValue;
use std::ops::Add;
-#[tokio::test]
-async fn count_distinct_timestamps() -> Result<()> {
- let ctx = SessionContext::new();
- ctx.register_table("ts_data", make_timestamp_nano_table()?)?;
-
- let sql = "SELECT COUNT(DISTINCT(ts)) FROM ts_data";
- let actual = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+----------------------------+",
- "| COUNT(DISTINCT ts_data.ts) |",
- "+----------------------------+",
- "| 3 |",
- "+----------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
#[tokio::test]
async fn test_current_timestamp_expressions() -> Result<()> {
let t1 = chrono::Utc::now().timestamp();
@@ -550,333 +530,6 @@ async fn group_by_timestamp_millis() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn interval_year() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select date '1994-01-01' + interval '1' year as date;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+------------+",
- "| date |",
- "+------------+",
- "| 1995-01-01 |",
- "+------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn add_interval_month() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select date '1994-01-31' + interval '1' month as date;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+------------+",
- "| date |",
- "+------------+",
- "| 1994-02-28 |",
- "+------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn sub_interval_month() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select date '1994-03-31' - interval '1' month as date;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+------------+",
- "| date |",
- "+------------+",
- "| 1994-02-28 |",
- "+------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn sub_month_wrap() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select date '1994-01-15' - interval '1' month as date;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+------------+",
- "| date |",
- "+------------+",
- "| 1993-12-15 |",
- "+------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn add_interval_day() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select date '1994-01-15' + interval '1' day as date;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+------------+",
- "| date |",
- "+------------+",
- "| 1994-01-16 |",
- "+------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn sub_interval_day() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select date '1994-01-01' - interval '1' day as date;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+------------+",
- "| date |",
- "+------------+",
- "| 1993-12-31 |",
- "+------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn cast_string_to_time() {
- let config = SessionConfig::new().set(
- "datafusion.optimizer.skip_failed_rules",
- ScalarValue::Boolean(Some(false)),
- );
- let ctx = SessionContext::with_config(config);
-
- let sql = "select \
- time '08:09:10.123456789' as time_nano, \
- time '13:14:15.123456' as time_micro,\
- time '13:14:15.123' as time_milli,\
- time '13:14:15' as time;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+--------------------+-----------------+--------------+----------+",
- "| time_nano | time_micro | time_milli | time |",
- "+--------------------+-----------------+--------------+----------+",
- "| 08:09:10.123456789 | 13:14:15.123456 | 13:14:15.123 | 13:14:15 |",
- "+--------------------+-----------------+--------------+----------+",
- ];
- assert_batches_eq!(expected, &results);
-
- // Fallible cases
-
- let sql = "SELECT TIME 'not a time' as time;";
- let result = try_execute_to_batches(&ctx, sql).await;
- assert_eq!(
- result.err().unwrap().to_string(),
- "simplify_expressions\ncaused by\nInternal error: Optimizer rule
'simplify_expressions' failed due to unexpected error: \
- Arrow error: Cast error: Cannot cast string 'not a time' to value of
Time64(Nanosecond) type. \
- This was likely caused by a bug in DataFusion's code and we would
welcome that you file an bug report in our issue tracker"
- );
-
- // An invalid time
- let sql = "SELECT TIME '24:01:02' as time;";
- let result = try_execute_to_batches(&ctx, sql).await;
- assert_eq!(
- result.err().unwrap().to_string(),
- "simplify_expressions\ncaused by\nInternal error: Optimizer rule
'simplify_expressions' failed due to unexpected error: \
- Arrow error: Cast error: Cannot cast string '24:01:02' to value of
Time64(Nanosecond) type. \
- This was likely caused by a bug in DataFusion's code and we would
welcome that you file an bug report in our issue tracker"
- );
-}
-
-#[tokio::test]
-async fn cast_to_timestamp_twice() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select to_timestamp(a) from (select to_timestamp(1) as a)A;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+-------------------------------+",
- "| totimestamp(a.a) |",
- "+-------------------------------+",
- "| 1970-01-01T00:00:00.000000001 |",
- "+-------------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn cast_to_timestamp_seconds_twice() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql =
- "select to_timestamp_seconds(a) from (select to_timestamp_seconds(1)
as a)A;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+-------------------------+",
- "| totimestampseconds(a.a) |",
- "+-------------------------+",
- "| 1970-01-01T00:00:01 |",
- "+-------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn cast_to_timestamp_millis_twice() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select to_timestamp_millis(a) from (select
to_timestamp_millis(1) as a)A;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+-------------------------+",
- "| totimestampmillis(a.a) |",
- "+-------------------------+",
- "| 1970-01-01T00:00:00.001 |",
- "+-------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn cast_to_timestamp_micros_twice() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select to_timestamp_micros(a) from (select
to_timestamp_micros(1) as a)A;";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+----------------------------+",
- "| totimestampmicros(a.a) |",
- "+----------------------------+",
- "| 1970-01-01T00:00:00.000001 |",
- "+----------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn to_timestamp_i32() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select to_timestamp(cast (1 as int));";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+-------------------------------+",
- "| totimestamp(Int64(1)) |",
- "+-------------------------------+",
- "| 1970-01-01T00:00:00.000000001 |",
- "+-------------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn to_timestamp_micros_i32() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select to_timestamp_micros(cast (1 as int));";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+-----------------------------+",
- "| totimestampmicros(Int64(1)) |",
- "+-----------------------------+",
- "| 1970-01-01T00:00:00.000001 |",
- "+-----------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn to_timestamp_millis_i32() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select to_timestamp_millis(cast (1 as int));";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+-----------------------------+",
- "| totimestampmillis(Int64(1)) |",
- "+-----------------------------+",
- "| 1970-01-01T00:00:00.001 |",
- "+-----------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn to_timestamp_seconds_i32() -> Result<()> {
- let ctx = SessionContext::new();
-
- let sql = "select to_timestamp_seconds(cast (1 as int));";
- let results = execute_to_batches(&ctx, sql).await;
-
- let expected = vec![
- "+------------------------------+",
- "| totimestampseconds(Int64(1)) |",
- "+------------------------------+",
- "| 1970-01-01T00:00:01 |",
- "+------------------------------+",
- ];
-
- assert_batches_eq!(expected, &results);
-
- Ok(())
-}
-
#[tokio::test]
async fn timestamp_add_interval_second() -> Result<()> {
let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index c02ce9ee04..e15730f5a7 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -256,6 +256,127 @@ SELECT COUNT(*) FROM ts_data_secs where ts >
from_unixtime(1599566400)
----
2
+
+# count_distinct_timestamps
+query P rowsort
+SELECT DISTINCT ts FROM ts_data_nanos;
+----
+2020-09-08T11:42:29.190855
+2020-09-08T12:42:29.190855
+2020-09-08T13:42:29.190855
+
+
+query I
+SELECT COUNT(DISTINCT(ts)) FROM ts_data_nanos
+----
+3
+
+# add_interval_month
+query D
+select date '1994-01-31' + interval '1' month as date;
+----
+1994-02-28
+
+
+# sub_interval_month
+query D
+select date '1994-03-31' - interval '1' month as date;
+----
+1994-02-28
+
+
+# sub_month_wrap
+query D
+select date '1994-01-15' - interval '1' month as date;
+----
+1993-12-15
+
+# add_interval_day
+query D
+select date '1994-01-15' + interval '1' day as date;
+----
+1994-01-16
+
+# sub_interval_day
+query D
+select date '1994-01-01' - interval '1' day as date;
+----
+1993-12-31
+
+
+# cast_string_to_time()
+statement ok
+set datafusion.optimizer.skip_failed_rules = false
+
+query DDDD
+select
+ time '08:09:10.123456789' as time_nano,
+ time '13:14:15.123456' as time_micro,
+ time '13:14:15.123' as time_milli,
+ time '13:14:15' as time;
+----
+08:09:10.123456789 13:14:15.123456 13:14:15.123 13:14:15
+
+query error Cannot cast string 'not a time' to value of Time64\(Nanosecond\)
type\. This was likely caused by a bug in DataFusion's code and we would
welcome that you file an bug report in our issue tracker
+SELECT TIME 'not a time' as time;
+
+# invalid time
+query error Cannot cast string '24:01:02' to value of Time64\(Nanosecond\)
type\. This was likely caused by a bug in DataFusion's code and we would
welcome that you file an bug report in our issue tracker
+SELECT TIME '24:01:02' as time;
+
+statement ok
+set datafusion.optimizer.skip_failed_rules = true
+
+
+# cast_to_timestamp_twice
+query P
+select to_timestamp(a) from (select to_timestamp(1) as a) A;
+----
+1970-01-01T00:00:00.000000001
+
+# cast_to_timestamp_seconds_twice
+query P
+select to_timestamp_seconds(a) from (select to_timestamp_seconds(1) as a)A
+----
+1970-01-01T00:00:01
+
+
+# cast_to_timestamp_millis_twice
+query P
+select to_timestamp_millis(a) from (select to_timestamp_millis(1) as a)A;
+----
+1970-01-01T00:00:00.001
+
+# cast_to_timestamp_micros_twice
+query P
+select to_timestamp_micros(a) from (select to_timestamp_micros(1) as a)A;
+----
+1970-01-01T00:00:00.000001
+
+# to_timestamp_i32
+query P
+select to_timestamp(cast (1 as int));
+----
+1970-01-01T00:00:00.000000001
+
+# to_timestamp_micros_i32
+query P
+select to_timestamp_micros(cast (1 as int));
+----
+1970-01-01T00:00:00.000001
+
+# to_timestamp_millis_i32
+query P
+select to_timestamp_millis(cast (1 as int));
+----
+1970-01-01T00:00:00.001
+
+# to_timestamp_seconds_i32
+query P
+select to_timestamp_seconds(cast (1 as int));
+----
+1970-01-01T00:00:01
+
statement ok
drop table ts_data