This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ad9d9a3f81316346e80c4ca888f99216622d7de1
Author: huangzhaowei <[email protected]>
AuthorDate: Tue Feb 14 14:47:00 2023 +0800

    [Improvement](ES)Supprt datav2 and datetimev2 for es query (#16633)
    
    * Supprt datav2 and datetimev2 for es query
---
 be/src/exec/es/es_scroll_parser.cpp | 112 ++++++++++++++++--------------------
 be/src/exec/es/es_scroll_parser.h   |  12 +---
 2 files changed, 52 insertions(+), 72 deletions(-)

diff --git a/be/src/exec/es/es_scroll_parser.cpp 
b/be/src/exec/es/es_scroll_parser.cpp
index 7170057ac2..5e9ee200da 100644
--- a/be/src/exec/es/es_scroll_parser.cpp
+++ b/be/src/exec/es/es_scroll_parser.cpp
@@ -758,13 +758,15 @@ Status ScrollParser::fill_columns(const TupleDescriptor* 
tuple_desc,
         }
 
         case TYPE_DATE:
-        case TYPE_DATETIME: {
+        case TYPE_DATETIME:
+        case TYPE_DATEV2:
+        case TYPE_DATETIMEV2: {
             // this would happend just only when `enable_docvalue_scan = 
false`, and field has timestamp format date from _source
             if (col.IsNumber()) {
                 // ES process date/datetime field would use millisecond 
timestamp for index or docvalue
                 // processing date type field, if a number is encountered, 
Doris On ES will force it to be processed according to ms
                 // Doris On ES needs to be consistent with ES, so just divided 
by 1000 because the unit for from_unixtime is seconds
-                RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, 
type));
+                RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false));
             } else if (col.IsArray() && pure_doc_value) {
                 // this would happened just only when `enable_docvalue_scan = 
true`
                 // ES add default format for all field after ES 6.4, if we not 
provided format for `date` field ES would impose
@@ -772,16 +774,16 @@ Status ScrollParser::fill_columns(const TupleDescriptor* 
tuple_desc,
                 // At present, we just process this string format date. After 
some PR were merged into Doris, we would impose `epoch_mills` for
                 // date field's docvalue
                 if (col[0].IsString()) {
-                    RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], 
type));
+                    RETURN_IF_ERROR(fill_date_col(col_ptr, col[0], type, 
true));
                     break;
                 }
                 // ES would return millisecond timestamp for date field, 
divided by 1000 because the unit for from_unixtime is seconds
-                RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, 
type));
+                RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false));
             } else {
                 // this would happened just only when `enable_docvalue_scan = 
false`, and field has string format date from _source
                 RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
                 RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
-                RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
+                RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, true));
             }
             break;
         }
@@ -796,72 +798,56 @@ Status ScrollParser::fill_columns(const TupleDescriptor* 
tuple_desc,
     return Status::OK();
 }
 
-Status ScrollParser::fill_date_slot_with_strval(void* slot, const 
rapidjson::Value& col,
-                                                PrimitiveType type) {
-    DateTimeValue* ts_slot = reinterpret_cast<DateTimeValue*>(slot);
+Status ScrollParser::fill_date_col(vectorized::IColumn* col_ptr, const 
rapidjson::Value& col,
+                                   PrimitiveType type, bool is_date_str) {
     const std::string& val = col.GetString();
     size_t val_size = col.GetStringLength();
-    if (!ts_slot->from_date_str(val.c_str(), val_size)) {
-        RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
-    }
-    if (type == TYPE_DATE) {
-        ts_slot->cast_to_date();
-    } else {
-        ts_slot->to_datetime();
-    }
-    return Status::OK();
-}
 
-Status ScrollParser::fill_date_slot_with_timestamp(void* slot, const 
rapidjson::Value& col,
-                                                   PrimitiveType type) {
-    if (!reinterpret_cast<DateTimeValue*>(slot)->from_unixtime(col.GetInt64() 
/ 1000, "+08:00")) {
-        RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
-    }
-    if (type == TYPE_DATE) {
-        reinterpret_cast<DateTimeValue*>(slot)->cast_to_date();
-    } else {
-        reinterpret_cast<DateTimeValue*>(slot)->set_type(TIME_DATETIME);
-    }
-    return Status::OK();
-}
+    if (type == TYPE_DATE || type == TYPE_DATETIME) {
+        vectorized::VecDateTimeValue dt_val;
+        if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) ||
+            (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, 
"+08:00"))) {
+            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
+        }
+        if (type == TYPE_DATE) {
+            dt_val.cast_to_date();
+        } else {
+            dt_val.to_datetime();
+        }
 
-Status ScrollParser::fill_date_col_with_strval(vectorized::IColumn* col_ptr,
-                                               const rapidjson::Value& col, 
PrimitiveType type) {
-    vectorized::VecDateTimeValue dt_val;
-    const std::string& val = col.GetString();
-    size_t val_size = col.GetStringLength();
-    if (!dt_val.from_date_str(val.c_str(), val_size)) {
-        RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
-    }
-    if (type == TYPE_DATE) {
-        dt_val.cast_to_date();
-    } else {
-        dt_val.to_datetime();
-    }
+        auto date_packed_int = 
binary_cast<doris::vectorized::VecDateTimeValue, int64_t>(
+                *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val));
+        col_ptr->insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+        return Status::OK();
+    } else if (type == TYPE_DATEV2) {
+        vectorized::DateV2Value<doris::vectorized::DateV2ValueType> dt_val;
+        if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) ||
+            (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, 
"+08:00"))) {
+            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
+        }
+        auto date_packed_int = binary_cast<
+                
doris::vectorized::DateV2Value<doris::vectorized::DateV2ValueType>, uint32_t>(
+                
*reinterpret_cast<vectorized::DateV2Value<doris::vectorized::DateV2ValueType>*>(
+                        &dt_val));
+        col_ptr->insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+        return Status::OK();
 
-    auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, 
int64_t>(
-            *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val));
-    col_ptr->insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
-    return Status::OK();
-}
+    } else if (type == TYPE_DATETIMEV2) {
+        vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType> dt_val;
+        if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) ||
+            (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, 
"+08:00"))) {
+            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
+        }
+        auto date_packed_int = binary_cast<
+                
vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>, uint64_t>(
+                
*reinterpret_cast<vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>*>(
+                        &dt_val));
+        col_ptr->insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+        return Status::OK();
 
-Status ScrollParser::fill_date_col_with_timestamp(vectorized::IColumn* col_ptr,
-                                                  const rapidjson::Value& col, 
PrimitiveType type) {
-    vectorized::VecDateTimeValue dt_val;
-    if (!dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00")) {
-        RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
-    }
-    if (type == TYPE_DATE) {
-        
reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)->cast_to_date();
     } else {
-        
reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)->set_type(TIME_DATETIME);
+        return Status::InternalError("Unsupported datetime type.");
     }
-
-    auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, 
int64_t>(
-            *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val));
-    col_ptr->insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
-
-    return Status::OK();
 }
 
 } // namespace doris
diff --git a/be/src/exec/es/es_scroll_parser.h 
b/be/src/exec/es/es_scroll_parser.h
index 561150ac04..9e90e12887 100644
--- a/be/src/exec/es/es_scroll_parser.h
+++ b/be/src/exec/es/es_scroll_parser.h
@@ -46,15 +46,9 @@ public:
 private:
     // helper method for processing date/datetime cols with rapidjson::Value
     // type is used for distinguish date and datetime
-    // fill date slot with string format date
-    Status fill_date_slot_with_strval(void* slot, const rapidjson::Value& col, 
PrimitiveType type);
-    Status fill_date_col_with_strval(vectorized::IColumn* col_ptr, const 
rapidjson::Value& col,
-                                     PrimitiveType type);
-    // fill date slot with timestamp
-    Status fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& 
col,
-                                         PrimitiveType type);
-    Status fill_date_col_with_timestamp(vectorized::IColumn* col_ptr, const 
rapidjson::Value& col,
-                                        PrimitiveType type);
+    // is_date_str indicate parse datetime from string, otherwise from 
epoch_millis
+    Status fill_date_col(vectorized::IColumn* col_ptr, const rapidjson::Value& 
col,
+                         PrimitiveType type, bool is_date_str);
 
 private:
     std::string _scroll_id;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to