This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 1db6659891 [GLUTEN-7829][CH] Fix read csv file with datetime field not 
equals spark (#7832)
1db6659891 is described below

commit 1db66598913411c9936ab0ebde84bb14845d1199
Author: Shuai li <[email protected]>
AuthorDate: Thu Nov 7 08:52:43 2024 +0800

    [GLUTEN-7829][CH] Fix read csv file with datetime field not equals spark 
(#7832)
---
 ...cala => GlutenClickHouseExcelFormatSuite.scala} |  5 ++-
 .../Storages/Serializations/ExcelReadHelpers.cpp   | 51 ++++++++++++++--------
 .../Storages/Serializations/ExcelReadHelpers.h     |  2 +-
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseExcelFormatSuite.scala
similarity index 99%
rename from 
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
rename to 
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseExcelFormatSuite.scala
index 2337316257..a5b866cb44 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseExcelFormatSuite.scala
@@ -46,7 +46,7 @@ case class AllDataTypesWithNonPrimitiveType(
     // data: (Seq[Int], (Int, String))
 )
 
-class GlutenClickHouseFileFormatSuite
+class GlutenClickHouseExcelFormatSuite
   extends GlutenClickHouseTPCHAbstractSuite
   with AdaptiveSparkPlanHelper {
   import testImplicits._
@@ -273,7 +273,8 @@ class GlutenClickHouseFileFormatSuite
         StructField.apply("boolean_field", BooleanType, nullable = true),
         StructField.apply("decimal_field", DecimalType.apply(10, 2), nullable 
= true),
         StructField.apply("date_field", DateType, nullable = true),
-        StructField.apply("timestamp_field", TimestampType, nullable = true)
+        StructField.apply("timestamp_field", TimestampType, nullable = true),
+        StructField.apply("boolean_field2", BooleanType, nullable = true)
       ))
 
     val options = new util.HashMap[String, String]()
diff --git a/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.cpp 
b/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.cpp
index 6a7b7b2e29..8c5bce2641 100644
--- a/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.cpp
+++ b/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.cpp
@@ -34,7 +34,7 @@ namespace local_engine
 bool readDateText(LocalDate & date, DB::ReadBuffer & buf, const 
DB::FormatSettings & settings)
 {
     bool is_us_style = settings.date_time_input_format == 
DB::FormatSettings::DateTimeInputFormat::BestEffortUS;
-    return readDateTextWithExcel(date, buf, is_us_style);
+    return readDateTextWithExcel(date, buf, is_us_style, settings);
 }
 
 bool readDateTime64Text(
@@ -84,6 +84,20 @@ bool readDatetime64TextWithExcel(
     ///           yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
     /// Other will fallback to ch read.
     /// The whole value is in buffer.
+    ///
+    auto quick_return
+        = [&time_zone, &scale, &datetime64](
+              UInt16 year, UInt8 month, UInt8 day, UInt8 hour, UInt8 minute, 
UInt8 second, DB::DateTime64::NativeType fractional) -> bool
+    {
+        if (!day)
+            day = 1;
+
+        if (!checkDate(year, month, day))
+            return false;
+
+        time_t datetime = time_zone.makeDateTime(year, month, day, hour, 
minute, second);
+        return 
DB::DecimalUtils::tryGetDecimalFromComponents<DB::DateTime64>(datetime, 
fractional, scale, datetime64);
+    };
 
     UInt16 year = 0;
     UInt8 month = 0;
@@ -91,6 +105,7 @@ bool readDatetime64TextWithExcel(
     UInt8 hour = 0;
     UInt8 minute = 0;
     UInt8 second = 0;
+    DB::DateTime64::NativeType fractional = 0;
 
     char year_digits[std::numeric_limits<UInt64>::digits10];
     size_t num_year_digits = readDigits(year_digits, sizeof(year_digits), buf);
@@ -106,11 +121,13 @@ bool readDatetime64TextWithExcel(
     char month_digits[std::numeric_limits<UInt64>::digits10];
     size_t num_month_digits = readDigits(month_digits, sizeof(month_digits), 
buf);
 
-    if (num_month_digits != 2)
+    if (num_month_digits == 1)
+        readDecimalNumber<1>(month, month_digits);
+    else if (num_month_digits == 2)
+        readDecimalNumber<2>(month, month_digits);
+    else
         return false;
 
-    readDecimalNumber<2>(month, month_digits);
-
     if (*buf.position() != delimiter_after_year) // delimiter must same char
         return false;
 
@@ -118,13 +135,18 @@ bool readDatetime64TextWithExcel(
     char day_digits[std::numeric_limits<UInt64>::digits10];
     size_t num_day_digits = readDigits(day_digits, sizeof(day_digits), buf);
 
-    if (num_day_digits != 2)
+    if (num_day_digits == 1)
+        readDecimalNumber<1>(day, day_digits);
+    else if (num_day_digits == 2)
+        readDecimalNumber<2>(day, day_digits);
+    else
         return false;
 
-    readDecimalNumber<2>(day, day_digits);
-
     char delimiter_after_day = *buf.position();
 
+    if (delimiter_after_day == settings.delimiter)
+        return quick_return(year, month, day, hour, minute, second, 
fractional);
+
     if (delimiter_after_day != ' ' && delimiter_after_day != '\'')
         return false;
 
@@ -159,7 +181,6 @@ bool readDatetime64TextWithExcel(
 
     /// .SSS'Z'
     /// if not has quote, not allow ',' after 'ss'
-    DB::DateTime64::NativeType fractional = 0;
     bool allow_comma = (settings.delimiter == ',' && quote) || (!quote && 
settings.delimiter != ',');
     if (!buf.eof() && (*buf.position() == '.' || (allow_comma && 
*buf.position() == ',')))
     {
@@ -186,17 +207,10 @@ bool readDatetime64TextWithExcel(
             buf.position() = buf.position() + 3;
     }
 
-    if (!day)
-        day = 1;
-
-    if (!checkDate(year, month, day))
-        return false;
-
-    time_t datetime = time_zone.makeDateTime(year, month, day, hour, minute, 
second);
-    return 
DB::DecimalUtils::tryGetDecimalFromComponents<DB::DateTime64>(datetime, 
fractional, scale, datetime64);
+    return quick_return(year, month, day, hour, minute, second, fractional);
 }
 
-inline bool readDateTextWithExcel(LocalDate & date, DB::ReadBuffer & buf, bool 
is_us_style)
+inline bool readDateTextWithExcel(LocalDate & date, DB::ReadBuffer & buf, bool 
is_us_style, const DB::FormatSettings & settings)
 {
     if (buf.eof())
         return false;
@@ -268,6 +282,9 @@ inline bool readDateTextWithExcel(LocalDate & date, 
DB::ReadBuffer & buf, bool i
             readDecimalNumber<2>(month, first_digits);
 
         char delimiter_after_year = *buf.position();
+        if (delimiter_after_year == settings.csv.delimiter)
+            return false;
+
         ++buf.position();
 
 
diff --git a/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.h 
b/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.h
index f96b31f704..a5f272334e 100644
--- a/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.h
+++ b/cpp-ch/local-engine/Storages/Serializations/ExcelReadHelpers.h
@@ -98,7 +98,7 @@ bool readDateTime64Text(
     const DateLUTImpl & utc_time_zone,
     bool quote);
 
-bool readDateTextWithExcel(LocalDate & date, DB::ReadBuffer & buf, bool 
is_us_style);
+bool readDateTextWithExcel(LocalDate & date, DB::ReadBuffer & buf, bool 
is_us_style, const DB::FormatSettings & settings);
 bool readDateText(LocalDate & date, DB::ReadBuffer & buf, const 
DB::FormatSettings & settings);
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to