This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit ad9d9a3f81316346e80c4ca888f99216622d7de1 Author: huangzhaowei <[email protected]> AuthorDate: Tue Feb 14 14:47:00 2023 +0800 [Improvement](ES)Supprt datav2 and datetimev2 for es query (#16633) * Supprt datav2 and datetimev2 for es query --- be/src/exec/es/es_scroll_parser.cpp | 112 ++++++++++++++++-------------------- be/src/exec/es/es_scroll_parser.h | 12 +--- 2 files changed, 52 insertions(+), 72 deletions(-) diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 7170057ac2..5e9ee200da 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -758,13 +758,15 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, } case TYPE_DATE: - case TYPE_DATETIME: { + case TYPE_DATETIME: + case TYPE_DATEV2: + case TYPE_DATETIMEV2: { // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source if (col.IsNumber()) { // ES process date/datetime field would use millisecond timestamp for index or docvalue // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type)); + RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false)); } else if (col.IsArray() && pure_doc_value) { // this would happened just only when `enable_docvalue_scan = true` // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose @@ -772,16 +774,16 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for // date field's docvalue if (col[0].IsString()) { - RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type)); + RETURN_IF_ERROR(fill_date_col(col_ptr, col[0], type, true)); break; } // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type)); + RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false)); } else { // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source RETURN_ERROR_IF_COL_IS_ARRAY(col, type); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type)); + RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, true)); } break; } @@ -796,72 +798,56 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, return Status::OK(); } -Status ScrollParser::fill_date_slot_with_strval(void* slot, const rapidjson::Value& col, - PrimitiveType type) { - DateTimeValue* ts_slot = reinterpret_cast<DateTimeValue*>(slot); +Status ScrollParser::fill_date_col(vectorized::IColumn* col_ptr, const rapidjson::Value& col, + PrimitiveType type, bool is_date_str) { const std::string& val = col.GetString(); size_t val_size = col.GetStringLength(); - if (!ts_slot->from_date_str(val.c_str(), val_size)) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); - } - if (type == TYPE_DATE) { - ts_slot->cast_to_date(); - } else { - ts_slot->to_datetime(); - } - return Status::OK(); -} -Status ScrollParser::fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& col, - PrimitiveType type) { - if (!reinterpret_cast<DateTimeValue*>(slot)->from_unixtime(col.GetInt64() / 1000, "+08:00")) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); - } - if (type == TYPE_DATE) { - reinterpret_cast<DateTimeValue*>(slot)->cast_to_date(); - } else { - reinterpret_cast<DateTimeValue*>(slot)->set_type(TIME_DATETIME); - } - return Status::OK(); -} + if (type == TYPE_DATE || type == TYPE_DATETIME) { + vectorized::VecDateTimeValue dt_val; + if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) || + (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { + RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); + } + if (type == TYPE_DATE) { + dt_val.cast_to_date(); + } else { + dt_val.to_datetime(); + } -Status ScrollParser::fill_date_col_with_strval(vectorized::IColumn* col_ptr, - const rapidjson::Value& col, PrimitiveType type) { - vectorized::VecDateTimeValue dt_val; - const std::string& val = col.GetString(); - size_t val_size = col.GetStringLength(); - if (!dt_val.from_date_str(val.c_str(), val_size)) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); - } - if (type == TYPE_DATE) { - dt_val.cast_to_date(); - } else { - dt_val.to_datetime(); - } + auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>( + *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)); + col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0); + return Status::OK(); + } else if (type == TYPE_DATEV2) { + vectorized::DateV2Value<doris::vectorized::DateV2ValueType> dt_val; + if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) || + (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { + RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); + } + auto date_packed_int = binary_cast< + doris::vectorized::DateV2Value<doris::vectorized::DateV2ValueType>, uint32_t>( + *reinterpret_cast<vectorized::DateV2Value<doris::vectorized::DateV2ValueType>*>( + &dt_val)); + col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0); + return Status::OK(); - auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>( - *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)); - col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0); - return Status::OK(); -} + } else if (type == TYPE_DATETIMEV2) { + vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType> dt_val; + if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) || + (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { + RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); + } + auto date_packed_int = binary_cast< + vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>, uint64_t>( + *reinterpret_cast<vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>*>( + &dt_val)); + col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0); + return Status::OK(); -Status ScrollParser::fill_date_col_with_timestamp(vectorized::IColumn* col_ptr, - const rapidjson::Value& col, PrimitiveType type) { - vectorized::VecDateTimeValue dt_val; - if (!dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00")) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); - } - if (type == TYPE_DATE) { - reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)->cast_to_date(); } else { - reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)->set_type(TIME_DATETIME); + return Status::InternalError("Unsupported datetime type."); } - - auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>( - *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)); - col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0); - - return Status::OK(); } } // namespace doris diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h index 561150ac04..9e90e12887 100644 --- a/be/src/exec/es/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -46,15 +46,9 @@ public: private: // helper method for processing date/datetime cols with rapidjson::Value // type is used for distinguish date and datetime - // fill date slot with string format date - Status fill_date_slot_with_strval(void* slot, const rapidjson::Value& col, PrimitiveType type); - Status fill_date_col_with_strval(vectorized::IColumn* col_ptr, const rapidjson::Value& col, - PrimitiveType type); - // fill date slot with timestamp - Status fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& col, - PrimitiveType type); - Status fill_date_col_with_timestamp(vectorized::IColumn* col_ptr, const rapidjson::Value& col, - PrimitiveType type); + // is_date_str indicate parse datetime from string, otherwise from epoch_millis + Status fill_date_col(vectorized::IColumn* col_ptr, const rapidjson::Value& col, + PrimitiveType type, bool is_date_str); private: std::string _scroll_id; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
