This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 435f9595c9 Update TO_DATE, TO_TIMESTAMP scalar functions to support
LargeUtf8, Utf8View (#12929)
435f9595c9 is described below
commit 435f9595c95723c90cd987de1a9f75484458efbb
Author: Bruce Ritchie <[email protected]>
AuthorDate: Wed Oct 16 10:17:09 2024 -0400
Update TO_DATE, TO_TIMESTAMP scalar functions to support LargeUtf8,
Utf8View (#12929)
* Update to_date and to_timestamp* udfs to support largeutf8 and utf8view.
Benchmark updated as well
* datetime depends on string expressions until #12898 lands
* update to reflect the stringarraytype move to a common path
* Update datafusion/functions/src/datetime/common.rs
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions/benches/to_timestamp.rs | 243 ++++++++++++++-------
datafusion/functions/src/datetime/common.rs | 203 ++++++++++-------
datafusion/functions/src/datetime/to_date.rs | 111 ++++++++--
datafusion/functions/src/datetime/to_timestamp.rs | 36 ++-
datafusion/sqllogictest/test_files/dates.slt | 62 ++++++
.../sqllogictest/test_files/string/string_view.slt | 20 ++
datafusion/sqllogictest/test_files/timestamps.slt | 99 ++++++++-
7 files changed, 584 insertions(+), 190 deletions(-)
diff --git a/datafusion/functions/benches/to_timestamp.rs
b/datafusion/functions/benches/to_timestamp.rs
index e734b6832f..5a87b34caf 100644
--- a/datafusion/functions/benches/to_timestamp.rs
+++ b/datafusion/functions/benches/to_timestamp.rs
@@ -20,27 +20,123 @@ extern crate criterion;
use std::sync::Arc;
use arrow::array::builder::StringBuilder;
-use arrow::array::ArrayRef;
+use arrow::array::{ArrayRef, StringArray};
+use arrow::compute::cast;
+use arrow::datatypes::DataType;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_functions::datetime::to_timestamp;
+fn data() -> StringArray {
+ let data: Vec<&str> = vec![
+ "1997-01-31T09:26:56.123Z",
+ "1997-01-31T09:26:56.123-05:00",
+ "1997-01-31 09:26:56.123-05:00",
+ "2023-01-01 04:05:06.789 -08",
+ "1997-01-31T09:26:56.123",
+ "1997-01-31 09:26:56.123",
+ "1997-01-31 09:26:56",
+ "1997-01-31 13:26:56",
+ "1997-01-31 13:26:56+04:00",
+ "1997-01-31",
+ ];
+
+ StringArray::from(data)
+}
+
+fn data_with_formats() -> (StringArray, StringArray, StringArray, StringArray)
{
+ let mut inputs = StringBuilder::new();
+ let mut format1_builder = StringBuilder::with_capacity(2, 10);
+ let mut format2_builder = StringBuilder::with_capacity(2, 10);
+ let mut format3_builder = StringBuilder::with_capacity(2, 10);
+
+ inputs.append_value("1997-01-31T09:26:56.123Z");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%Z");
+
+ inputs.append_value("1997-01-31T09:26:56.123-05:00");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%z");
+
+ inputs.append_value("1997-01-31 09:26:56.123-05:00");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f%Z");
+
+ inputs.append_value("2023-01-01 04:05:06.789 -08");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f %#z");
+
+ inputs.append_value("1997-01-31T09:26:56.123");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f");
+
+ inputs.append_value("1997-01-31 09:26:56.123");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f");
+
+ inputs.append_value("1997-01-31 09:26:56");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S");
+
+ inputs.append_value("1997-01-31 092656");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H%M%S");
+
+ inputs.append_value("1997-01-31 092656+04:00");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H%M%S%:z");
+
+ inputs.append_value("Sun Jul 8 00:34:60 2001");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d 00:00:00");
+
+ (
+ inputs.finish(),
+ format1_builder.finish(),
+ format2_builder.finish(),
+ format3_builder.finish(),
+ )
+}
fn criterion_benchmark(c: &mut Criterion) {
- c.bench_function("to_timestamp_no_formats", |b| {
- let mut inputs = StringBuilder::new();
- inputs.append_value("1997-01-31T09:26:56.123Z");
- inputs.append_value("1997-01-31T09:26:56.123-05:00");
- inputs.append_value("1997-01-31 09:26:56.123-05:00");
- inputs.append_value("2023-01-01 04:05:06.789 -08");
- inputs.append_value("1997-01-31T09:26:56.123");
- inputs.append_value("1997-01-31 09:26:56.123");
- inputs.append_value("1997-01-31 09:26:56");
- inputs.append_value("1997-01-31 13:26:56");
- inputs.append_value("1997-01-31 13:26:56+04:00");
- inputs.append_value("1997-01-31");
-
- let string_array = ColumnarValue::Array(Arc::new(inputs.finish()) as
ArrayRef);
+ c.bench_function("to_timestamp_no_formats_utf8", |b| {
+ let string_array = ColumnarValue::Array(Arc::new(data()) as ArrayRef);
+
+ b.iter(|| {
+ black_box(
+ to_timestamp()
+ .invoke(&[string_array.clone()])
+ .expect("to_timestamp should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("to_timestamp_no_formats_largeutf8", |b| {
+ let data = cast(&data(), &DataType::LargeUtf8).unwrap();
+ let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef);
+
+ b.iter(|| {
+ black_box(
+ to_timestamp()
+ .invoke(&[string_array.clone()])
+ .expect("to_timestamp should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("to_timestamp_no_formats_utf8view", |b| {
+ let data = cast(&data(), &DataType::Utf8View).unwrap();
+ let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef);
b.iter(|| {
black_box(
@@ -51,67 +147,66 @@ fn criterion_benchmark(c: &mut Criterion) {
})
});
- c.bench_function("to_timestamp_with_formats", |b| {
- let mut inputs = StringBuilder::new();
- let mut format1_builder = StringBuilder::with_capacity(2, 10);
- let mut format2_builder = StringBuilder::with_capacity(2, 10);
- let mut format3_builder = StringBuilder::with_capacity(2, 10);
-
- inputs.append_value("1997-01-31T09:26:56.123Z");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%Z");
-
- inputs.append_value("1997-01-31T09:26:56.123-05:00");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%z");
-
- inputs.append_value("1997-01-31 09:26:56.123-05:00");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f%Z");
-
- inputs.append_value("2023-01-01 04:05:06.789 -08");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f %#z");
-
- inputs.append_value("1997-01-31T09:26:56.123");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f");
-
- inputs.append_value("1997-01-31 09:26:56.123");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f");
-
- inputs.append_value("1997-01-31 09:26:56");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%d %H:%M:%S");
-
- inputs.append_value("1997-01-31 092656");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%d %H%M%S");
-
- inputs.append_value("1997-01-31 092656+04:00");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%d %H%M%S%:z");
-
- inputs.append_value("Sun Jul 8 00:34:60 2001");
- format1_builder.append_value("%+");
- format2_builder.append_value("%c");
- format3_builder.append_value("%Y-%m-%d 00:00:00");
+ c.bench_function("to_timestamp_with_formats_utf8", |b| {
+ let (inputs, format1, format2, format3) = data_with_formats();
+
+ let args = [
+ ColumnarValue::Array(Arc::new(inputs) as ArrayRef),
+ ColumnarValue::Array(Arc::new(format1) as ArrayRef),
+ ColumnarValue::Array(Arc::new(format2) as ArrayRef),
+ ColumnarValue::Array(Arc::new(format3) as ArrayRef),
+ ];
+ b.iter(|| {
+ black_box(
+ to_timestamp()
+ .invoke(&args.clone())
+ .expect("to_timestamp should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("to_timestamp_with_formats_largeutf8", |b| {
+ let (inputs, format1, format2, format3) = data_with_formats();
+
+ let args = [
+ ColumnarValue::Array(
+ Arc::new(cast(&inputs, &DataType::LargeUtf8).unwrap()) as
ArrayRef
+ ),
+ ColumnarValue::Array(
+ Arc::new(cast(&format1, &DataType::LargeUtf8).unwrap()) as
ArrayRef
+ ),
+ ColumnarValue::Array(
+ Arc::new(cast(&format2, &DataType::LargeUtf8).unwrap()) as
ArrayRef
+ ),
+ ColumnarValue::Array(
+ Arc::new(cast(&format3, &DataType::LargeUtf8).unwrap()) as
ArrayRef
+ ),
+ ];
+ b.iter(|| {
+ black_box(
+ to_timestamp()
+ .invoke(&args.clone())
+ .expect("to_timestamp should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("to_timestamp_with_formats_utf8view", |b| {
+ let (inputs, format1, format2, format3) = data_with_formats();
let args = [
- ColumnarValue::Array(Arc::new(inputs.finish()) as ArrayRef),
- ColumnarValue::Array(Arc::new(format1_builder.finish()) as
ArrayRef),
- ColumnarValue::Array(Arc::new(format2_builder.finish()) as
ArrayRef),
- ColumnarValue::Array(Arc::new(format3_builder.finish()) as
ArrayRef),
+ ColumnarValue::Array(
+ Arc::new(cast(&inputs, &DataType::Utf8View).unwrap()) as
ArrayRef
+ ),
+ ColumnarValue::Array(
+ Arc::new(cast(&format1, &DataType::Utf8View).unwrap()) as
ArrayRef
+ ),
+ ColumnarValue::Array(
+ Arc::new(cast(&format2, &DataType::Utf8View).unwrap()) as
ArrayRef
+ ),
+ ColumnarValue::Array(
+ Arc::new(cast(&format3, &DataType::Utf8View).unwrap()) as
ArrayRef
+ ),
];
b.iter(|| {
black_box(
diff --git a/datafusion/functions/src/datetime/common.rs
b/datafusion/functions/src/datetime/common.rs
index 89b40a3534..6e3106a5bc 100644
--- a/datafusion/functions/src/datetime/common.rs
+++ b/datafusion/functions/src/datetime/common.rs
@@ -18,15 +18,16 @@
use std::sync::Arc;
use arrow::array::{
- Array, ArrowPrimitiveType, GenericStringArray, OffsetSizeTrait,
PrimitiveArray,
+ Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray,
+ StringViewArray,
};
use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
use arrow::datatypes::DataType;
use chrono::format::{parse, Parsed, StrftimeItems};
use chrono::LocalResult::Single;
use chrono::{DateTime, TimeZone, Utc};
-use itertools::Either;
+use crate::strings::StringArrayType;
use datafusion_common::cast::as_generic_string_array;
use datafusion_common::{
exec_err, unwrap_or_internal_err, DataFusionError, Result, ScalarType,
ScalarValue,
@@ -41,14 +42,15 @@ pub(crate) fn string_to_timestamp_nanos_shim(s: &str) ->
Result<i64> {
string_to_timestamp_nanos(s).map_err(|e| e.into())
}
-/// Checks that all the arguments from the second are of type [Utf8] or
[LargeUtf8]
+/// Checks that all the arguments from the second are of type [Utf8],
[LargeUtf8] or [Utf8View]
///
/// [Utf8]: DataType::Utf8
/// [LargeUtf8]: DataType::LargeUtf8
+/// [Utf8View]: DataType::Utf8View
pub(crate) fn validate_data_types(args: &[ColumnarValue], name: &str) ->
Result<()> {
for (idx, a) in args.iter().skip(1).enumerate() {
match a.data_type() {
- DataType::Utf8 | DataType::LargeUtf8 => {
+ DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => {
// all good
}
_ => {
@@ -178,26 +180,43 @@ pub(crate) fn string_to_timestamp_millis_formatted(s:
&str, format: &str) -> Res
.timestamp_millis())
}
-pub(crate) fn handle<'a, O, F, S>(
- args: &'a [ColumnarValue],
+pub(crate) fn handle<O, F, S>(
+ args: &[ColumnarValue],
op: F,
name: &str,
) -> Result<ColumnarValue>
where
O: ArrowPrimitiveType,
S: ScalarType<O::Native>,
- F: Fn(&'a str) -> Result<O::Native>,
+ F: Fn(&str) -> Result<O::Native>,
{
match &args[0] {
ColumnarValue::Array(a) => match a.data_type() {
- DataType::Utf8 | DataType::LargeUtf8 =>
Ok(ColumnarValue::Array(Arc::new(
- unary_string_to_primitive_function::<i32, O, _>(&[a.as_ref()],
op, name)?,
+ DataType::Utf8View => Ok(ColumnarValue::Array(Arc::new(
+ unary_string_to_primitive_function::<&StringViewArray, O, _>(
+ a.as_ref().as_string_view(),
+ op,
+ )?,
+ ))),
+ DataType::LargeUtf8 => Ok(ColumnarValue::Array(Arc::new(
+ unary_string_to_primitive_function::<&GenericStringArray<i64>,
O, _>(
+ a.as_ref().as_string::<i64>(),
+ op,
+ )?,
+ ))),
+ DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(
+ unary_string_to_primitive_function::<&GenericStringArray<i32>,
O, _>(
+ a.as_ref().as_string::<i32>(),
+ op,
+ )?,
))),
other => exec_err!("Unsupported data type {other:?} for function
{name}"),
},
ColumnarValue::Scalar(scalar) => match scalar {
- ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
- let result = a.as_ref().map(|x| (op)(x)).transpose()?;
+ ScalarValue::Utf8View(a)
+ | ScalarValue::LargeUtf8(a)
+ | ScalarValue::Utf8(a) => {
+ let result = a.as_ref().map(|x| op(x)).transpose()?;
Ok(ColumnarValue::Scalar(S::scalar(result)))
}
other => exec_err!("Unsupported data type {other:?} for function
{name}"),
@@ -205,11 +224,11 @@ where
}
}
-// given an function that maps a `&str`, `&str` to an arrow native type,
+// Given a function that maps a `&str`, `&str` to an arrow native type,
// returns a `ColumnarValue` where the function is applied to either a
`ArrayRef` or `ScalarValue`
// depending on the `args`'s variant.
-pub(crate) fn handle_multiple<'a, O, F, S, M>(
- args: &'a [ColumnarValue],
+pub(crate) fn handle_multiple<O, F, S, M>(
+ args: &[ColumnarValue],
op: F,
op2: M,
name: &str,
@@ -217,24 +236,24 @@ pub(crate) fn handle_multiple<'a, O, F, S, M>(
where
O: ArrowPrimitiveType,
S: ScalarType<O::Native>,
- F: Fn(&'a str, &'a str) -> Result<O::Native>,
+ F: Fn(&str, &str) -> Result<O::Native>,
M: Fn(O::Native) -> O::Native,
{
match &args[0] {
ColumnarValue::Array(a) => match a.data_type() {
- DataType::Utf8 | DataType::LargeUtf8 => {
+ DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => {
// validate the column types
for (pos, arg) in args.iter().enumerate() {
match arg {
ColumnarValue::Array(arg) => match arg.data_type() {
- DataType::Utf8 | DataType::LargeUtf8 => {
+ DataType::Utf8View | DataType::LargeUtf8 |
DataType::Utf8 => {
// all good
}
other => return exec_err!("Unsupported data type
{other:?} for function {name}, arg # {pos}"),
},
ColumnarValue::Scalar(arg) => {
match arg.data_type() {
- DataType::Utf8 | DataType::LargeUtf8 => {
+ DataType::Utf8View| DataType::LargeUtf8 |
DataType::Utf8 => {
// all good
}
other => return exec_err!("Unsupported data
type {other:?} for function {name}, arg # {pos}"),
@@ -244,7 +263,7 @@ where
}
Ok(ColumnarValue::Array(Arc::new(
- strings_to_primitive_function::<i32, O, _, _>(args, op,
op2, name)?,
+ strings_to_primitive_function::<O, _, _>(args, op, op2,
name)?,
)))
}
other => {
@@ -253,7 +272,9 @@ where
},
// if the first argument is a scalar utf8 all arguments are expected
to be scalar utf8
ColumnarValue::Scalar(scalar) => match scalar {
- ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) => {
+ ScalarValue::Utf8View(a)
+ | ScalarValue::LargeUtf8(a)
+ | ScalarValue::Utf8(a) => {
let a = a.as_ref();
// ASK: Why do we trust `a` to be non-null at this point?
let a = unwrap_or_internal_err!(a);
@@ -262,7 +283,9 @@ where
for (pos, v) in args.iter().enumerate().skip(1) {
let ColumnarValue::Scalar(
- ScalarValue::Utf8(x) | ScalarValue::LargeUtf8(x),
+ ScalarValue::Utf8View(x)
+ | ScalarValue::LargeUtf8(x)
+ | ScalarValue::Utf8(x),
) = v
else {
return exec_err!("Unsupported data type {v:?} for
function {name}, arg # {pos}");
@@ -299,18 +322,16 @@ where
/// # Errors
/// This function errors iff:
/// * the number of arguments is not > 1 or
-/// * the array arguments are not castable to a `GenericStringArray` or
/// * the function `op` errors for all input
-pub(crate) fn strings_to_primitive_function<'a, T, O, F, F2>(
- args: &'a [ColumnarValue],
+pub(crate) fn strings_to_primitive_function<O, F, F2>(
+ args: &[ColumnarValue],
op: F,
op2: F2,
name: &str,
) -> Result<PrimitiveArray<O>>
where
O: ArrowPrimitiveType,
- T: OffsetSizeTrait,
- F: Fn(&'a str, &'a str) -> Result<O::Native>,
+ F: Fn(&str, &str) -> Result<O::Native>,
F2: Fn(O::Native) -> O::Native,
{
if args.len() < 2 {
@@ -321,50 +342,90 @@ where
);
}
- // this will throw the error if any of the array args are not castable to
GenericStringArray
- let data = args
- .iter()
- .map(|a| match a {
- ColumnarValue::Array(a) => {
- Ok(Either::Left(as_generic_string_array::<T>(a.as_ref())?))
+ match &args[0] {
+ ColumnarValue::Array(a) => match a.data_type() {
+ DataType::Utf8View => {
+ let string_array = a.as_string_view();
+ handle_array_op::<O, &StringViewArray, F, F2>(
+ &string_array,
+ &args[1..],
+ op,
+ op2,
+ )
}
- ColumnarValue::Scalar(s) => match s {
- ScalarValue::Utf8(a) | ScalarValue::LargeUtf8(a) =>
Ok(Either::Right(a)),
- other => exec_err!(
- "Unexpected scalar type encountered '{other}' for function
'{name}'"
- ),
- },
- })
- .collect::<Result<Vec<Either<&GenericStringArray<T>,
&Option<String>>>>>()?;
-
- let first_arg = &data.first().unwrap().left().unwrap();
+ DataType::LargeUtf8 => {
+ let string_array = as_generic_string_array::<i64>(&a)?;
+ handle_array_op::<O, &GenericStringArray<i64>, F, F2>(
+ &string_array,
+ &args[1..],
+ op,
+ op2,
+ )
+ }
+ DataType::Utf8 => {
+ let string_array = as_generic_string_array::<i32>(&a)?;
+ handle_array_op::<O, &GenericStringArray<i32>, F, F2>(
+ &string_array,
+ &args[1..],
+ op,
+ op2,
+ )
+ }
+ other => exec_err!(
+ "Unsupported data type {other:?} for function substr,\
+ expected Utf8View, Utf8 or LargeUtf8."
+ ),
+ },
+ other => exec_err!(
+ "Received {} data type, expected only array",
+ other.data_type()
+ ),
+ }
+}
- first_arg
+fn handle_array_op<'a, O, V, F, F2>(
+ first: &V,
+ args: &[ColumnarValue],
+ op: F,
+ op2: F2,
+) -> Result<PrimitiveArray<O>>
+where
+ V: StringArrayType<'a>,
+ O: ArrowPrimitiveType,
+ F: Fn(&str, &str) -> Result<O::Native>,
+ F2: Fn(O::Native) -> O::Native,
+{
+ first
.iter()
.enumerate()
.map(|(pos, x)| {
let mut val = None;
-
if let Some(x) = x {
- let param_args = data.iter().skip(1);
-
- // go through the args and find the first successful result.
Only the last
- // failure will be returned if no successful result was
received.
- for param_arg in param_args {
- // param_arg is an array, use the corresponding index into
the array as the arg
- // we're currently parsing
- let p = *param_arg;
- let r = if p.is_left() {
- let p = p.left().unwrap();
- op(x, p.value(pos))
- }
- // args is a scalar, use it directly
- else if let Some(p) = p.right().unwrap() {
- op(x, p.as_str())
- } else {
- continue;
- };
+ for arg in args {
+ let v = match arg {
+ ColumnarValue::Array(a) => match a.data_type() {
+ DataType::Utf8View =>
Ok(a.as_string_view().value(pos)),
+ DataType::LargeUtf8 =>
Ok(a.as_string::<i64>().value(pos)),
+ DataType::Utf8 =>
Ok(a.as_string::<i32>().value(pos)),
+ other => exec_err!("Unexpected type encountered
'{other}'"),
+ },
+ ColumnarValue::Scalar(s) => match s {
+ ScalarValue::Utf8View(a)
+ | ScalarValue::LargeUtf8(a)
+ | ScalarValue::Utf8(a) => {
+ if let Some(v) = a {
+ Ok(v.as_str())
+ } else {
+ continue;
+ }
+ }
+ other => {
+ exec_err!("Unexpected scalar type encountered
'{other}'")
+ }
+ },
+ }?;
+ let r = op(x, v);
if r.is_ok() {
val = Some(Ok(op2(r.unwrap())));
break;
@@ -385,28 +446,16 @@ where
/// # Errors
/// This function errors iff:
/// * the number of arguments is not 1 or
-/// * the first argument is not castable to a `GenericStringArray` or
/// * the function `op` errors
-fn unary_string_to_primitive_function<'a, T, O, F>(
- args: &[&'a dyn Array],
+fn unary_string_to_primitive_function<'a, StringArrType, O, F>(
+ array: StringArrType,
op: F,
- name: &str,
) -> Result<PrimitiveArray<O>>
where
+ StringArrType: StringArrayType<'a>,
O: ArrowPrimitiveType,
- T: OffsetSizeTrait,
F: Fn(&'a str) -> Result<O::Native>,
{
- if args.len() != 1 {
- return exec_err!(
- "{:?} args were supplied but {} takes exactly one argument",
- args.len(),
- name
- );
- }
-
- let array = as_generic_string_array::<T>(args[0])?;
-
// first map is the iterator, second is for the `Option<_>`
array.iter().map(|x| x.map(&op).transpose()).collect()
}
diff --git a/datafusion/functions/src/datetime/to_date.rs
b/datafusion/functions/src/datetime/to_date.rs
index 2803fd042b..b21fe995ce 100644
--- a/datafusion/functions/src/datetime/to_date.rs
+++ b/datafusion/functions/src/datetime/to_date.rs
@@ -17,7 +17,7 @@
use crate::datetime::common::*;
use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::Date32;
+use arrow::datatypes::DataType::*;
use arrow::error::ArrowError::ParseError;
use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
use datafusion_common::error::DataFusionError;
@@ -151,13 +151,10 @@ impl ScalarUDFImpl for ToDateFunc {
}
match args[0].data_type() {
- DataType::Int32
- | DataType::Int64
- | DataType::Null
- | DataType::Float64
- | DataType::Date32
- | DataType::Date64 => args[0].cast_to(&DataType::Date32, None),
- DataType::Utf8 => self.to_date(args),
+ Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
+ args[0].cast_to(&Date32, None)
+ }
+ Utf8View | LargeUtf8 | Utf8 => self.to_date(args),
other => {
exec_err!("Unsupported data type {:?} for function to_date",
other)
}
@@ -171,9 +168,11 @@ impl ScalarUDFImpl for ToDateFunc {
#[cfg(test)]
mod tests {
+ use arrow::array::{Array, Date32Array, GenericStringArray,
StringViewArray};
use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
use datafusion_common::ScalarValue;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+ use std::sync::Arc;
use super::ToDateFunc;
@@ -204,9 +203,17 @@ mod tests {
];
for tc in &test_cases {
- let date_scalar = ScalarValue::Utf8(Some(tc.date_str.to_string()));
- let to_date_result =
-
ToDateFunc::new().invoke(&[ColumnarValue::Scalar(date_scalar)]);
+ test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
+ test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())),
tc);
+ test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())),
tc);
+
+ test_array::<GenericStringArray<i32>>(tc);
+ test_array::<GenericStringArray<i64>>(tc);
+ test_array::<StringViewArray>(tc);
+ }
+
+ fn test_scalar(sv: ScalarValue, tc: &TestCase) {
+ let to_date_result =
ToDateFunc::new().invoke(&[ColumnarValue::Scalar(sv)]);
match to_date_result {
Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
@@ -220,6 +227,33 @@ mod tests {
_ => panic!("Could not convert '{}' to Date", tc.date_str),
}
}
+
+ fn test_array<A>(tc: &TestCase)
+ where
+ A: From<Vec<&'static str>> + Array + 'static,
+ {
+ let date_array = A::from(vec![tc.date_str]);
+ let to_date_result =
+
ToDateFunc::new().invoke(&[ColumnarValue::Array(Arc::new(date_array))]);
+
+ match to_date_result {
+ Ok(ColumnarValue::Array(a)) => {
+ assert_eq!(a.len(), 1);
+
+ let expected = Date32Type::parse_formatted(tc.date_str,
"%Y-%m-%d");
+ let mut builder = Date32Array::builder(4);
+ builder.append_value(expected.unwrap());
+
+ assert_eq!(
+ &builder.finish() as &dyn Array,
+ a.as_ref(),
+ "{}: to_date created wrong value",
+ tc.name
+ );
+ }
+ _ => panic!("Could not convert '{}' to Date", tc.date_str),
+ }
+ }
}
#[test]
@@ -271,12 +305,26 @@ mod tests {
];
for tc in &test_cases {
- let formatted_date_scalar =
- ScalarValue::Utf8(Some(tc.formatted_date.to_string()));
+
test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
+ test_scalar(
+ ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
+ tc,
+ );
+ test_scalar(
+ ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
+ tc,
+ );
+
+ test_array::<GenericStringArray<i32>>(tc);
+ test_array::<GenericStringArray<i64>>(tc);
+ test_array::<StringViewArray>(tc);
+ }
+
+ fn test_scalar(sv: ScalarValue, tc: &TestCase) {
let format_scalar =
ScalarValue::Utf8(Some(tc.format_str.to_string()));
let to_date_result = ToDateFunc::new().invoke(&[
- ColumnarValue::Scalar(formatted_date_scalar),
+ ColumnarValue::Scalar(sv),
ColumnarValue::Scalar(format_scalar),
]);
@@ -291,6 +339,41 @@ mod tests {
),
}
}
+
+ fn test_array<A>(tc: &TestCase)
+ where
+ A: From<Vec<&'static str>> + Array + 'static,
+ {
+ let date_array = A::from(vec![tc.formatted_date]);
+ let format_array = A::from(vec![tc.format_str]);
+
+ let to_date_result = ToDateFunc::new().invoke(&[
+ ColumnarValue::Array(Arc::new(date_array)),
+ ColumnarValue::Array(Arc::new(format_array)),
+ ]);
+
+ match to_date_result {
+ Ok(ColumnarValue::Array(a)) => {
+ assert_eq!(a.len(), 1);
+
+ let expected = Date32Type::parse_formatted(tc.date_str,
"%Y-%m-%d");
+ let mut builder = Date32Array::builder(4);
+ builder.append_value(expected.unwrap());
+
+ assert_eq!(
+ &builder.finish() as &dyn Array, a.as_ref(),
+ "{}: to_date created wrong value for date '{}' with
format string '{}'",
+ tc.name,
+ tc.formatted_date,
+ tc.format_str
+ );
+ }
+ _ => panic!(
+ "Could not convert '{}' with format string '{}'to Date:
{:?}",
+ tc.formatted_date, tc.format_str, to_date_result
+ ),
+ }
+ }
}
#[test]
diff --git a/datafusion/functions/src/datetime/to_timestamp.rs
b/datafusion/functions/src/datetime/to_timestamp.rs
index cbb6f37603..b17c9a005d 100644
--- a/datafusion/functions/src/datetime/to_timestamp.rs
+++ b/datafusion/functions/src/datetime/to_timestamp.rs
@@ -18,7 +18,7 @@
use std::any::Any;
use std::sync::Arc;
-use arrow::datatypes::DataType::Timestamp;
+use arrow::datatypes::DataType::*;
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{
ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
@@ -162,16 +162,16 @@ impl ScalarUDFImpl for ToTimestampFunc {
}
match args[0].data_type() {
- DataType::Int32 | DataType::Int64 => args[0]
+ Int32 | Int64 => args[0]
.cast_to(&Timestamp(Second, None), None)?
.cast_to(&Timestamp(Nanosecond, None), None),
- DataType::Null | DataType::Float64 | Timestamp(_, None) => {
+ Null | Float64 | Timestamp(_, None) => {
args[0].cast_to(&Timestamp(Nanosecond, None), None)
}
- DataType::Timestamp(_, Some(tz)) => {
+ Timestamp(_, Some(tz)) => {
args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
}
- DataType::Utf8 => {
+ Utf8View | LargeUtf8 | Utf8 => {
to_timestamp_impl::<TimestampNanosecondType>(args,
"to_timestamp")
}
other => {
@@ -215,13 +215,11 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc {
}
match args[0].data_type() {
- DataType::Null | DataType::Int32 | DataType::Int64 | Timestamp(_,
None) => {
+ Null | Int32 | Int64 | Timestamp(_, None) => {
args[0].cast_to(&Timestamp(Second, None), None)
}
- DataType::Timestamp(_, Some(tz)) => {
- args[0].cast_to(&Timestamp(Second, Some(tz)), None)
- }
- DataType::Utf8 => {
+ Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second,
Some(tz)), None),
+ Utf8View | LargeUtf8 | Utf8 => {
to_timestamp_impl::<TimestampSecondType>(args,
"to_timestamp_seconds")
}
other => {
@@ -265,13 +263,13 @@ impl ScalarUDFImpl for ToTimestampMillisFunc {
}
match args[0].data_type() {
- DataType::Null | DataType::Int32 | DataType::Int64 | Timestamp(_,
None) => {
+ Null | Int32 | Int64 | Timestamp(_, None) => {
args[0].cast_to(&Timestamp(Millisecond, None), None)
}
- DataType::Timestamp(_, Some(tz)) => {
+ Timestamp(_, Some(tz)) => {
args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
}
- DataType::Utf8 => {
+ Utf8View | LargeUtf8 | Utf8 => {
to_timestamp_impl::<TimestampMillisecondType>(args,
"to_timestamp_millis")
}
other => {
@@ -315,13 +313,13 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc {
}
match args[0].data_type() {
- DataType::Null | DataType::Int32 | DataType::Int64 | Timestamp(_,
None) => {
+ Null | Int32 | Int64 | Timestamp(_, None) => {
args[0].cast_to(&Timestamp(Microsecond, None), None)
}
- DataType::Timestamp(_, Some(tz)) => {
+ Timestamp(_, Some(tz)) => {
args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
}
- DataType::Utf8 => {
+ Utf8View | LargeUtf8 | Utf8 => {
to_timestamp_impl::<TimestampMicrosecondType>(args,
"to_timestamp_micros")
}
other => {
@@ -365,13 +363,13 @@ impl ScalarUDFImpl for ToTimestampNanosFunc {
}
match args[0].data_type() {
- DataType::Null | DataType::Int32 | DataType::Int64 | Timestamp(_,
None) => {
+ Null | Int32 | Int64 | Timestamp(_, None) => {
args[0].cast_to(&Timestamp(Nanosecond, None), None)
}
- DataType::Timestamp(_, Some(tz)) => {
+ Timestamp(_, Some(tz)) => {
args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
}
- DataType::Utf8 => {
+ Utf8View | LargeUtf8 | Utf8 => {
to_timestamp_impl::<TimestampNanosecondType>(args,
"to_timestamp_nanos")
}
other => {
diff --git a/datafusion/sqllogictest/test_files/dates.slt
b/datafusion/sqllogictest/test_files/dates.slt
index 1ef56b1a7e..4425eee333 100644
--- a/datafusion/sqllogictest/test_files/dates.slt
+++ b/datafusion/sqllogictest/test_files/dates.slt
@@ -194,6 +194,14 @@ create table ts_utf8_data(ts varchar(100), format
varchar(100)) as values
('1926632005', '%s'),
('2000-01-01T01:01:01+07:00', '%+');
+statement ok
+create table ts_largeutf8_data as
+select arrow_cast(ts, 'LargeUtf8') as ts, arrow_cast(format, 'LargeUtf8') as
format from ts_utf8_data;
+
+statement ok
+create table ts_utf8view_data as
+select arrow_cast(ts, 'Utf8View') as ts, arrow_cast(format, 'Utf8View') as
format from ts_utf8_data;
+
# verify date data using tables with formatting options
query D
SELECT to_date(t.ts, t.format) from ts_utf8_data as t
@@ -204,6 +212,24 @@ SELECT to_date(t.ts, t.format) from ts_utf8_data as t
2031-01-19
1999-12-31
+query D
+SELECT to_date(t.ts, t.format) from ts_largeutf8_data as t
+----
+2020-09-08
+2031-01-19
+2020-09-08
+2031-01-19
+1999-12-31
+
+query D
+SELECT to_date(t.ts, t.format) from ts_utf8view_data as t
+----
+2020-09-08
+2031-01-19
+2020-09-08
+2031-01-19
+1999-12-31
+
# verify date data using tables with formatting options
query D
SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z') from ts_utf8_data as t
@@ -214,6 +240,24 @@ SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s',
'%d-%m-%Y %H:%M:%S%#z')
2031-01-19
1999-12-31
+query D
+SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z') from ts_largeutf8_data as t
+----
+2020-09-08
+2031-01-19
+2020-09-08
+2031-01-19
+1999-12-31
+
+query D
+SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z') from ts_utf8view_data as t
+----
+2020-09-08
+2031-01-19
+2020-09-08
+2031-01-19
+1999-12-31
+
# verify date data using tables with formatting options where at least one
column cannot be parsed
query error Error parsing timestamp from '1926632005' using format '%d-%m-%Y
%H:%M:%S%#z': input contains invalid characters
SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%d-%m-%Y %H:%M:%S%#z')
from ts_utf8_data as t
@@ -228,6 +272,24 @@ SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q',
'%d-%m-%Y %H:%M:%S%#z',
2031-01-19
1999-12-31
+query D
+SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y
%H:%M:%S%#z', '%+') from ts_largeutf8_data as t
+----
+2020-09-08
+2031-01-19
+2020-09-08
+2031-01-19
+1999-12-31
+
+query D
+SELECT to_date(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y
%H:%M:%S%#z', '%+') from ts_utf8view_data as t
+----
+2020-09-08
+2031-01-19
+2020-09-08
+2031-01-19
+1999-12-31
+
# timestamp data using tables with formatting options in an array is not
supported at this time
query error function unsupported data type at index 1:
SELECT to_date(t.ts, make_array('%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y
%H:%M:%S%#z', '%+')) from ts_utf8_data as t
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt
b/datafusion/sqllogictest/test_files/string/string_view.slt
index 2f2a81eb17..997dca7191 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -901,6 +901,26 @@ logical_plan
01)Projection: find_in_set(test.column1_utf8view, Utf8View("a,b,c,d")) AS c
02)--TableScan: test projection=[column1_utf8view]
+## Ensure no casts for to_date
+query TT
+EXPLAIN SELECT
+ to_date(column1_utf8view, 'a,b,c,d') as c
+FROM test;
+----
+logical_plan
+01)Projection: to_date(test.column1_utf8view, Utf8("a,b,c,d")) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
+## Ensure no casts for to_timestamp
+query TT
+EXPLAIN SELECT
+ to_timestamp(column1_utf8view, 'a,b,c,d') as c
+FROM test;
+----
+logical_plan
+01)Projection: to_timestamp(test.column1_utf8view, Utf8("a,b,c,d")) AS c
+02)--TableScan: test projection=[column1_utf8view]
+
## Ensure no casts for binary operators
# `~` operator (regex match)
query TT
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index 7a7a8a8703..a680e0db52 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -2191,6 +2191,14 @@ create table ts_utf8_data(ts varchar(100), format
varchar(100)) as values
('1926632005', '%s'),
('2000-01-01T01:01:01+07:00', '%+');
+statement ok
+create table ts_largeutf8_data as
+select arrow_cast(ts, 'LargeUtf8') as ts, arrow_cast(format, 'LargeUtf8') as
format from ts_utf8_data;
+
+statement ok
+create table ts_utf8view_data as
+select arrow_cast(ts, 'Utf8View') as ts, arrow_cast(format, 'Utf8View') as
format from ts_utf8_data;
+
# verify timestamp data using tables with formatting options
query P
SELECT to_timestamp(t.ts, t.format) from ts_utf8_data as t
@@ -2201,9 +2209,84 @@ SELECT to_timestamp(t.ts, t.format) from ts_utf8_data as
t
2031-01-19T23:33:25
1999-12-31T18:01:01
+query PPPPP
+SELECT to_timestamp(t.ts, t.format),
+ to_timestamp_seconds(t.ts, t.format),
+ to_timestamp_millis(t.ts, t.format),
+ to_timestamp_micros(t.ts, t.format),
+ to_timestamp_nanos(t.ts, t.format)
+ from ts_largeutf8_data as t
+----
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T18:33:25 2031-01-19T18:33:25 2031-01-19T18:33:25
2031-01-19T18:33:25 2031-01-19T18:33:25
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T23:33:25 2031-01-19T23:33:25 2031-01-19T23:33:25
2031-01-19T23:33:25 2031-01-19T23:33:25
+1999-12-31T18:01:01 1999-12-31T18:01:01 1999-12-31T18:01:01
1999-12-31T18:01:01 1999-12-31T18:01:01
+
+query PPPPP
+SELECT to_timestamp(t.ts, t.format),
+ to_timestamp_seconds(t.ts, t.format),
+ to_timestamp_millis(t.ts, t.format),
+ to_timestamp_micros(t.ts, t.format),
+ to_timestamp_nanos(t.ts, t.format)
+ from ts_utf8view_data as t
+----
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T18:33:25 2031-01-19T18:33:25 2031-01-19T18:33:25
2031-01-19T18:33:25 2031-01-19T18:33:25
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T23:33:25 2031-01-19T23:33:25 2031-01-19T23:33:25
2031-01-19T23:33:25 2031-01-19T23:33:25
+1999-12-31T18:01:01 1999-12-31T18:01:01 1999-12-31T18:01:01
1999-12-31T18:01:01 1999-12-31T18:01:01
+
# verify timestamp data using tables with formatting options
+query PPPPP
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_seconds(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s',
'%d-%m-%Y %H:%M:%S%#z'),
+ to_timestamp_millis(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_micros(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_nanos(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z')
+ from ts_utf8_data as t
+----
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T18:33:25 2031-01-19T18:33:25 2031-01-19T18:33:25
2031-01-19T18:33:25 2031-01-19T18:33:25
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T23:33:25 2031-01-19T23:33:25 2031-01-19T23:33:25
2031-01-19T23:33:25 2031-01-19T23:33:25
+1999-12-31T18:01:01 1999-12-31T18:01:01 1999-12-31T18:01:01
1999-12-31T18:01:01 1999-12-31T18:01:01
+
+query PPPPP
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_seconds(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s',
'%d-%m-%Y %H:%M:%S%#z'),
+ to_timestamp_millis(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_micros(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_nanos(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z')
+ from ts_largeutf8_data as t
+----
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T18:33:25 2031-01-19T18:33:25 2031-01-19T18:33:25
2031-01-19T18:33:25 2031-01-19T18:33:25
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T23:33:25 2031-01-19T23:33:25 2031-01-19T23:33:25
2031-01-19T23:33:25 2031-01-19T23:33:25
+1999-12-31T18:01:01 1999-12-31T18:01:01 1999-12-31T18:01:01
1999-12-31T18:01:01 1999-12-31T18:01:01
+
+query PPPPP
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_seconds(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s',
'%d-%m-%Y %H:%M:%S%#z'),
+ to_timestamp_millis(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_micros(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z'),
+ to_timestamp_nanos(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z')
+ from ts_utf8view_data as t
+----
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T18:33:25 2031-01-19T18:33:25 2031-01-19T18:33:25
2031-01-19T18:33:25 2031-01-19T18:33:25
+2020-09-08T12:00:00 2020-09-08T12:00:00 2020-09-08T12:00:00
2020-09-08T12:00:00 2020-09-08T12:00:00
+2031-01-19T23:33:25 2031-01-19T23:33:25 2031-01-19T23:33:25
2031-01-19T23:33:25 2031-01-19T23:33:25
+1999-12-31T18:01:01 1999-12-31T18:01:01 1999-12-31T18:01:01
1999-12-31T18:01:01 1999-12-31T18:01:01
+
+# verify timestamp data using tables with formatting options where at least
one column cannot be parsed
+query error Error parsing timestamp from '1926632005' using format '%d-%m-%Y
%H:%M:%S%#z': input contains invalid characters
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%d-%m-%Y
%H:%M:%S%#z') from ts_utf8_data as t
+
+# verify timestamp data using tables with formatting options where one of the
formats is invalid
query P
-SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%s', '%d-%m-%Y
%H:%M:%S%#z') from ts_utf8_data as t
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y
%H:%M:%S%#z', '%+') from ts_utf8_data as t
----
2020-09-08T12:00:00
2031-01-19T18:33:25
@@ -2211,13 +2294,17 @@ SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+',
'%s', '%d-%m-%Y %H:%M:%S
2031-01-19T23:33:25
1999-12-31T18:01:01
-# verify timestamp data using tables with formatting options where at least
one column cannot be parsed
-query error Error parsing timestamp from '1926632005' using format '%d-%m-%Y
%H:%M:%S%#z': input contains invalid characters
-SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%+', '%d-%m-%Y
%H:%M:%S%#z') from ts_utf8_data as t
+query P
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y
%H:%M:%S%#z', '%+') from ts_largeutf8_data as t
+----
+2020-09-08T12:00:00
+2031-01-19T18:33:25
+2020-09-08T12:00:00
+2031-01-19T23:33:25
+1999-12-31T18:01:01
-# verify timestamp data using tables with formatting options where one of the
formats is invalid
query P
-SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y
%H:%M:%S%#z', '%+') from ts_utf8_data as t
+SELECT to_timestamp(t.ts, '%Y-%m-%d %H/%M/%S%#z', '%s', '%q', '%d-%m-%Y
%H:%M:%S%#z', '%+') from ts_utf8view_data as t
----
2020-09-08T12:00:00
2031-01-19T18:33:25
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]