(doris) branch branch-3.0 updated: [feature](hive)support hive catalog read json table. (#43469) (#44848)

morningman Wed, 04 Dec 2024 08:29:18 -0800

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 8efb98e8518 [feature](hive)support hive catalog read json table.  
(#43469) (#44848)
8efb98e8518 is described below

commit 8efb98e8518aa34692314ba6a382852060c99116
Author: daidai <[email protected]>
AuthorDate: Thu Dec 5 00:18:23 2024 +0800

    [feature](hive)support hive catalog read json table.  (#43469) (#44848)
    
    bp #43469
---
 .../vec/data_types/serde/data_type_array_serde.h   |   2 +
 be/src/vec/data_types/serde/data_type_map_serde.h  |   4 +
 .../data_types/serde/data_type_nullable_serde.h    |   2 +
 be/src/vec/data_types/serde/data_type_serde.h      |  12 +-
 .../vec/data_types/serde/data_type_struct_serde.h  |   2 +
 be/src/vec/exec/format/json/new_json_reader.cpp    | 555 ++++++++++++++++-----
 be/src/vec/exec/format/json/new_json_reader.h      |  32 +-
 be/src/vec/exec/scan/vfile_scanner.cpp             |   4 +-
 .../scripts/create_preinstalled_scripts/run69.hql  |  35 ++
 .../scripts/create_preinstalled_scripts/run70.hql  |  73 +++
 .../scripts/create_preinstalled_scripts/run71.hql  |  13 +
 .../json/json_all_complex_types/dt=dt1/000000_0    |   3 +
 .../json/json_all_complex_types/dt=dt2/000000_0    |   1 +
 .../json/json_all_complex_types/dt=dt3/000000_0    |   2 +
 .../preinstalled_data/json/json_load_data_table/1  |  13 +
 .../json/json_nested_complex_table/1               |   2 +
 .../json/json_nested_complex_table/2               |   1 +
 .../json/json_nested_complex_table/modify_2        |   2 +
 .../datasource/hive/HiveMetaStoreClientHelper.java |   3 +
 .../doris/datasource/hive/source/HiveScanNode.java |  41 +-
 .../hive/hive_json_basic_test.out                  | 115 +++++
 .../hive/hive_json_basic_test.groovy               |  71 +++
 22 files changed, 830 insertions(+), 158 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h 
b/be/src/vec/data_types/serde/data_type_array_serde.h
index 13c40e60777..2798596c823 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -101,6 +101,8 @@ public:
         nested_serde->set_return_object_as_string(value);
     }
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return 
{nested_serde}; }
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h 
b/be/src/vec/data_types/serde/data_type_map_serde.h
index 5e10a7ec3f2..d9572682470 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.h
+++ b/be/src/vec/data_types/serde/data_type_map_serde.h
@@ -95,6 +95,10 @@ public:
         value_serde->set_return_object_as_string(value);
     }
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override {
+        return {key_serde, value_serde};
+    }
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index e9af344fb65..c7dac856621 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -99,6 +99,8 @@ public:
                                   int row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return 
{nested_serde}; }
+
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 46236faa926..6caa51d2663 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -98,6 +98,10 @@ class IColumn;
 class Arena;
 class IDataType;
 
+class DataTypeSerDe;
+using DataTypeSerDeSPtr = std::shared_ptr<DataTypeSerDe>;
+using DataTypeSerDeSPtrs = std::vector<DataTypeSerDeSPtr>;
+
 // Deserialize means read from different file format or memory format,
 // for example read from arrow, read from parquet.
 // Serialize means write the column cell or the total column into another
@@ -332,6 +336,11 @@ public:
                                           Arena& mem_pool, int row_num) const;
     virtual Status read_one_cell_from_json(IColumn& column, const 
rapidjson::Value& result) const;
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "Method get_nested_serdes is not supported for 
this serde");
+    }
+
 protected:
     bool _return_object_as_string = false;
     // This parameter indicates what level the serde belongs to and is mainly 
used for complex types
@@ -374,9 +383,6 @@ inline void checkArrowStatus(const arrow::Status& status, 
const std::string& col
     }
 }
 
-using DataTypeSerDeSPtr = std::shared_ptr<DataTypeSerDe>;
-using DataTypeSerDeSPtrs = std::vector<DataTypeSerDeSPtr>;
-
 DataTypeSerDeSPtrs create_data_type_serdes(
         const std::vector<std::shared_ptr<const IDataType>>& types);
 DataTypeSerDeSPtrs create_data_type_serdes(const std::vector<SlotDescriptor*>& 
slots);
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h 
b/be/src/vec/data_types/serde/data_type_struct_serde.h
index 84e988e150b..5cd6f89e42f 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.h
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.h
@@ -171,6 +171,8 @@ public:
         }
     }
 
+    virtual DataTypeSerDeSPtrs get_nested_serdes() const override { return 
elem_serdes_ptrs; }
+
 private:
     std::optional<size_t> try_get_position_by_name(const String& name) const;
 
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index e3c2c1f332e..307edc265be 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -54,8 +54,11 @@
 #include "util/slice.h"
 #include "util/uid_util.h"
 #include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_map.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
+#include "vec/columns/column_struct.h"
 #include "vec/common/assert_cast.h"
 #include "vec/common/typeid_cast.h"
 #include "vec/core/block.h"
@@ -164,10 +167,18 @@ void NewJsonReader::_init_file_description() {
 }
 
 Status NewJsonReader::init_reader(
-        const std::unordered_map<std::string, VExprContextSPtr>& 
col_default_value_ctx) {
+        const std::unordered_map<std::string, VExprContextSPtr>& 
col_default_value_ctx,
+        bool is_load) {
+    _is_load = is_load;
+
     // generate _col_default_value_map
     RETURN_IF_ERROR(_get_column_default_value(_file_slot_descs, 
col_default_value_ctx));
 
+    //use serde insert data to column.
+    for (auto* slot_desc : _file_slot_descs) {
+        _serdes.emplace_back(slot_desc->get_data_type_ptr()->get_serde());
+    }
+
     // create decompressor.
     // _decompressor may be nullptr if this is not a compressed file
     RETURN_IF_ERROR(Decompressor::create_decompressor(_file_compress_type, 
&_decompressor));
@@ -387,6 +398,9 @@ Status NewJsonReader::_get_range_params() {
     if (_params.file_attributes.__isset.fuzzy_parse) {
         _fuzzy_parse = _params.file_attributes.fuzzy_parse;
     }
+    if (_range.table_format_params.table_format_type == "hive") {
+        _is_hive_table = true;
+    }
     return Status::OK();
 }
 
@@ -474,8 +488,8 @@ Status NewJsonReader::_vhandle_simple_json(RuntimeState* 
/*state*/, Block& block
         bool valid = false;
         if (_next_row >= _total_rows) { // parse json and generic document
             Status st = _parse_json(is_empty_row, eof);
-            if (st.is<DATA_QUALITY_ERROR>()) {
-                continue; // continue to read next
+            if (_is_load && st.is<DATA_QUALITY_ERROR>()) {
+                continue; // continue to read next (for load, after this , 
already append error to file.)
             }
             RETURN_IF_ERROR(st);
             if (*is_empty_row) {
@@ -752,7 +766,21 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
 
     int ctx_idx = 0;
     bool has_valid_value = false;
-    for (auto* slot_desc : slot_descs) {
+
+    if (_is_hive_table) {
+        //don't like _fuzzy_parse,each line read in must modify name_map once.
+
+        for (int i = 0; i < objectValue.MemberCount(); ++i) {
+            auto it = objectValue.MemberBegin() + i;
+            std::string field_name(it->name.GetString(), 
it->name.GetStringLength());
+            std::transform(field_name.begin(), field_name.end(), 
field_name.begin(), ::tolower);
+
+            //Use the last value with the same name.
+            _name_map.emplace(field_name, i);
+        }
+    }
+    for (size_t slot_idx = 0; slot_idx < slot_descs.size(); ++slot_idx) {
+        auto* slot_desc = slot_descs[slot_idx];
         if (!slot_desc->is_materialized()) {
             continue;
         }
@@ -761,7 +789,7 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
         auto* column_ptr = 
block.get_by_position(dest_index).column->assume_mutable().get();
         rapidjson::Value::ConstMemberIterator it = objectValue.MemberEnd();
 
-        if (_fuzzy_parse) {
+        if (_fuzzy_parse || _is_hive_table) {
             auto idx_it = _name_map.find(slot_desc->col_name());
             if (idx_it != _name_map.end() && idx_it->second < 
objectValue.MemberCount()) {
                 it = objectValue.MemberBegin() + idx_it->second;
@@ -773,20 +801,21 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
 
         if (it != objectValue.MemberEnd()) {
             const rapidjson::Value& value = it->value;
-            RETURN_IF_ERROR(_write_data_to_column(&value, slot_desc, 
column_ptr, valid));
+            RETURN_IF_ERROR(_write_data_to_column(&value, slot_desc->type(), 
column_ptr,
+                                                  slot_desc->col_name(), 
_serdes[slot_idx], valid));
             if (!(*valid)) {
                 return Status::OK();
             }
             has_valid_value = true;
         } else {
             // not found, filling with default value
-            RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+            RETURN_IF_ERROR(_fill_missing_column(slot_desc, _serdes[slot_idx], 
column_ptr, valid));
             if (!(*valid)) {
                 return Status::OK();
             }
         }
     }
-    if (!has_valid_value) {
+    if (!has_valid_value && _is_load) {
         // there is no valid value in json line but has filled with default 
value before
         // so remove this line in block
         string col_names;
@@ -810,79 +839,188 @@ Status 
NewJsonReader::_set_column_value(rapidjson::Value& objectValue, Block& bl
 }
 
 Status 
NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                            SlotDescriptor* slot_desc, 
IColumn* column_ptr,
+                                            const TypeDescriptor& type_desc,
+                                            vectorized::IColumn* column_ptr,
+                                            const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                             bool* valid) {
-    const char* str_value = nullptr;
-    char tmp_buf[128] = {0};
-    int32_t wbytes = 0;
-    std::string json_str;
-
     ColumnNullable* nullable_column = nullptr;
-    if (slot_desc->is_nullable()) {
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+
+    bool value_is_null = (value == nullptr) || (value->GetType() == 
rapidjson::Type::kNullType);
+
+    if (column_ptr->is_nullable()) {
         nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
-        if (value->GetType() != rapidjson::Type::kNullType) {
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        if (value_is_null) {
+            nullable_column->insert_default();
+            *valid = true;
+            return Status::OK();
+        } else {
             nullable_column->get_null_map_data().push_back(0);
+        }
+
+    } else if (value_is_null) [[unlikely]] {
+        if (_is_load) {
+            RETURN_IF_ERROR(_append_error_msg(
+                    *value, "Json value is null, but the column `{}` is not 
nullable.", column_name,
+                    valid));
+            return Status::OK();
+
         } else {
-            nullable_column->insert_default();
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        column_ptr = &nullable_column->get_nested_column();
     }
 
-    switch (value->GetType()) {
-    case rapidjson::Type::kStringType:
-        str_value = value->GetString();
-        wbytes = value->GetStringLength();
-        break;
-    case rapidjson::Type::kNumberType:
-        if (value->IsUint()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
-        } else if (value->IsInt()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", value->GetInt());
-        } else if (value->IsUint64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
-        } else if (value->IsInt64()) {
-            wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
-        } else if (value->IsFloat() || value->IsDouble()) {
-            auto* end = fmt::format_to(tmp_buf, "{}", value->GetDouble());
-            wbytes = end - tmp_buf;
+    if (_is_load || !type_desc.is_complex_type()) {
+        if (value->IsString()) {
+            Slice slice {value->GetString(), value->GetStringLength()};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
+
         } else {
-            return Status::InternalError<false>("It should not here.");
+            // We can `switch (value->GetType()) case: kNumberType`.
+            // Note that `if (value->IsInt())`, but column is FloatColumn.
+            // Or for any type, use `NewJsonReader::_print_json_value(*value)`.
+
+            const char* str_value = nullptr;
+            char tmp_buf[128] = {0};
+            size_t wbytes = 0;
+            std::string json_str;
+
+            switch (value->GetType()) {
+            case rapidjson::Type::kStringType:
+                str_value = value->GetString();
+                wbytes = value->GetStringLength();
+                break;
+            case rapidjson::Type::kNumberType:
+                if (value->IsUint()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", 
value->GetUint());
+                } else if (value->IsInt()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", 
value->GetInt());
+                } else if (value->IsUint64()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, 
value->GetUint64());
+                } else if (value->IsInt64()) {
+                    wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, 
value->GetInt64());
+                } else if (value->IsFloat() || value->IsDouble()) {
+                    auto* end = fmt::format_to(tmp_buf, "{}", 
value->GetDouble());
+                    wbytes = end - tmp_buf;
+                } else {
+                    return Status::InternalError<false>("It should not here.");
+                }
+                str_value = tmp_buf;
+                break;
+            case rapidjson::Type::kFalseType:
+                wbytes = 1;
+                str_value = (char*)"0";
+                break;
+            case rapidjson::Type::kTrueType:
+                wbytes = 1;
+                str_value = (char*)"1";
+                break;
+            default:
+                // for other type, we convert it to string to save
+                json_str = NewJsonReader::_print_json_value(*value);
+                wbytes = json_str.size();
+                str_value = json_str.c_str();
+                break;
+            }
+            Slice slice {str_value, wbytes};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
         }
-        str_value = tmp_buf;
-        break;
-    case rapidjson::Type::kFalseType:
-        wbytes = 1;
-        str_value = (char*)"0";
-        break;
-    case rapidjson::Type::kTrueType:
-        wbytes = 1;
-        str_value = (char*)"1";
-        break;
-    case rapidjson::Type::kNullType:
-        if (!slot_desc->is_nullable()) {
-            RETURN_IF_ERROR(_append_error_msg(
-                    *value, "Json value is null, but the column `{}` is not 
nullable.",
-                    slot_desc->col_name(), valid));
-            return Status::OK();
+    } else if (type_desc.type == TYPE_STRUCT) {
+        if (!value->IsObject()) [[unlikely]] {
+            return Status::DataQualityError(
+                    "Json value isn't object, but the column `{}` is struct.", 
column_name);
         }
 
-        // return immediately to prevent from repeatedly insert_data
-        *valid = true;
-        return Status::OK();
-    default:
-        // for other type like array or object. we convert it to string to save
-        json_str = NewJsonReader::_print_json_value(*value);
-        wbytes = json_str.size();
-        str_value = json_str.c_str();
-        break;
-    }
+        auto sub_col_size = type_desc.children.size();
+        const auto& struct_value = value->GetObject();
+
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
+
+        std::map<std::string, size_t> sub_col_name_to_idx;
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            sub_col_name_to_idx.emplace(type_desc.field_names[sub_col_idx], 
sub_col_idx);
+        }
+
+        std::vector<rapidjson::Value::ConstValueIterator> 
sub_values(sub_col_size, nullptr);
+        for (const auto& sub : struct_value) {
+            if (!sub.name.IsString()) [[unlikely]] {
+                return Status::DataQualityError(
+                        "Json file struct column `{}` subfield name isn't a 
String", column_name);
+            }
+
+            auto sub_key_char = sub.name.GetString();
+            auto sub_key_length = sub.name.GetStringLength();
+
+            std::string sub_key(sub_key_char, sub_key_length);
+            std::transform(sub_key.begin(), sub_key.end(), sub_key.begin(), 
::tolower);
+
+            if (sub_col_name_to_idx.find(sub_key) == 
sub_col_name_to_idx.end()) [[unlikely]] {
+                continue;
+            }
+            size_t sub_column_idx = sub_col_name_to_idx[sub_key];
+            sub_values[sub_column_idx] = &sub.value;
+        }
+
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            auto sub_value = sub_values[sub_col_idx];
+
+            const auto& sub_col_type = type_desc.children[sub_col_idx];
+
+            RETURN_IF_ERROR(_write_data_to_column(
+                    sub_value, sub_col_type, 
struct_column_ptr->get_column(sub_col_idx).get_ptr(),
+                    column_name + "." + type_desc.field_names[sub_col_idx], 
sub_serdes[sub_col_idx],
+                    valid));
+        }
+    } else if (type_desc.type == TYPE_MAP) {
+        if (!value->IsObject()) [[unlikely]] {
+            return Status::DataQualityError("Json value isn't object, but the 
column `{}` is map.",
+                                            column_name);
+        }
+        const auto& object_value = value->GetObject();
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);
 
-    // TODO: if the vexpr can support another 'slot_desc type' than 
'TYPE_VARCHAR',
-    // we need use a function to support these types to insert data in columns.
-    DCHECK(slot_desc->type().type == TYPE_VARCHAR || slot_desc->type().type == 
TYPE_STRING)
-            << slot_desc->type().type << ", query id: " << 
print_id(_state->query_id());
-    assert_cast<ColumnString*>(column_ptr)->insert_data(str_value, wbytes);
+        for (const auto& member_value : object_value) {
+            RETURN_IF_ERROR(_write_data_to_column(
+                    &member_value.name, type_desc.children[0],
+                    
map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr(),
+                    column_name + ".key", sub_serdes[0], valid));
+
+            RETURN_IF_ERROR(_write_data_to_column(
+                    &member_value.value, type_desc.children[1],
+                    
map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr(),
+                    column_name + ".value", sub_serdes[1], valid));
+        }
+
+        auto& offsets = map_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + object_value.MemberCount());
+    } else if (type_desc.type == TYPE_ARRAY) {
+        if (!value->IsArray()) [[unlikely]] {
+            return Status::DataQualityError("Json value isn't array, but the 
column `{}` is array.",
+                                            column_name);
+        }
+        const auto& array_value = value->GetArray();
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);
+
+        for (const auto& sub_value : array_value) {
+            RETURN_IF_ERROR(_write_data_to_column(&sub_value, 
type_desc.children[0],
+                                                  
array_column_ptr->get_data().get_ptr(),
+                                                  column_name + ".element", 
sub_serdes[0], valid));
+        }
+        auto& offsets = array_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + array_value.Size());
+    } else {
+        return Status::InternalError("Not support load to complex column.");
+    }
 
     *valid = true;
     return Status::OK();
@@ -914,20 +1052,21 @@ Status 
NewJsonReader::_write_columns_by_jsonpath(rapidjson::Value& objectValue,
                 // if json_values' size > 1, it means we just match an array, 
not a wrapped one, so no need to unwrap.
                 json_values = &((*json_values)[0]);
             }
-            RETURN_IF_ERROR(_write_data_to_column(json_values, slot_descs[i], 
column_ptr, valid));
+            RETURN_IF_ERROR(_write_data_to_column(json_values, 
slot_descs[i]->type(), column_ptr,
+                                                  slot_descs[i]->col_name(), 
_serdes[i], valid));
             if (!(*valid)) {
                 return Status::OK();
             }
             has_valid_value = true;
         } else {
             // not found, filling with default value
-            RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+            RETURN_IF_ERROR(_fill_missing_column(slot_desc, _serdes[i], 
column_ptr, valid));
             if (!(*valid)) {
                 return Status::OK();
             }
         }
     }
-    if (!has_valid_value) {
+    if (!has_valid_value && _is_load) {
         // there is no valid value in json line but has filled with default 
value before
         // so remove this line in block
         for (int i = 0; i < block.columns(); ++i) {
@@ -1074,7 +1213,7 @@ Status 
NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc
 
         // step2: get json value by json doc
         Status st = _get_json_value(&size, eof, &error, is_empty_row);
-        if (st.is<DATA_QUALITY_ERROR>()) {
+        if (_is_load && st.is<DATA_QUALITY_ERROR>()) {
             return Status::OK();
         }
         RETURN_IF_ERROR(st);
@@ -1349,25 +1488,39 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
     for (auto field : *value) {
         std::string_view key = field.unescaped_key();
         StringRef name_ref(key.data(), key.size());
+        std::string key_string;
+        if (_is_hive_table) {
+            key_string = name_ref.to_string();
+            std::transform(key_string.begin(), key_string.end(), 
key_string.begin(), ::tolower);
+            name_ref = StringRef(key_string);
+        }
         const size_t column_index = _column_index(name_ref, key_index++);
         if (UNLIKELY(ssize_t(column_index) < 0)) {
             // This key is not exist in slot desc, just ignore
             continue;
         }
         if (_seen_columns[column_index]) {
-            continue;
+            if (_is_hive_table) {
+                //Since value can only be traversed once,
+                // we can only insert the original value first, then delete 
it, and then reinsert the new value
+                
block.get_by_position(column_index).column->assume_mutable()->pop_back(1);
+            } else {
+                continue;
+            }
         }
         simdjson::ondemand::value val = field.value();
         auto* column_ptr = 
block.get_by_position(column_index).column->assume_mutable().get();
-        RETURN_IF_ERROR(
-                _simdjson_write_data_to_column(val, slot_descs[column_index], 
column_ptr, valid));
+        RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                val, slot_descs[column_index]->type(), column_ptr,
+                slot_descs[column_index]->col_name(), _serdes[column_index], 
valid));
         if (!(*valid)) {
             return Status::OK();
         }
         _seen_columns[column_index] = true;
         has_valid_value = true;
     }
-    if (!has_valid_value) {
+
+    if (!has_valid_value && _is_load) {
         string col_names;
         for (auto* slot_desc : slot_descs) {
             col_names.append(slot_desc->col_name() + ", ");
@@ -1400,7 +1553,7 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
         auto* column_ptr = 
block.get_by_position(i).column->assume_mutable().get();
         if (column_ptr->size() < cur_row_count + 1) {
             DCHECK(column_ptr->size() == cur_row_count);
-            RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+            RETURN_IF_ERROR(_fill_missing_column(slot_desc, _serdes[i], 
column_ptr, valid));
             if (!(*valid)) {
                 return Status::OK();
             }
@@ -1409,12 +1562,6 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
         DCHECK(column_ptr->size() == cur_row_count + 1);
     }
 
-#ifndef NDEBUG
-    // Check all columns rows matched
-    for (size_t i = 0; i < block.columns(); ++i) {
-        DCHECK_EQ(block.get_by_position(i).column->size(), cur_row_count + 1);
-    }
-#endif
     // There is at least one valid value here
     DCHECK(nullcount < block.columns());
     *valid = true;
@@ -1422,54 +1569,180 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
 }
 
 Status 
NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& value,
-                                                     SlotDescriptor* 
slot_desc, IColumn* column,
-                                                     bool* valid) {
-    // write
+                                                     const TypeDescriptor& 
type_desc,
+                                                     vectorized::IColumn* 
column_ptr,
+                                                     const std::string& 
column_name,
+                                                     DataTypeSerDeSPtr serde, 
bool* valid) {
     ColumnNullable* nullable_column = nullptr;
-    IColumn* column_ptr = nullptr;
-    if (slot_desc->is_nullable()) {
-        nullable_column = assert_cast<ColumnNullable*>(column);
-        column_ptr = &nullable_column->get_nested_column();
-    }
-    // TODO: if the vexpr can support another 'slot_desc type' than 
'TYPE_VARCHAR',
-    // we need use a function to support these types to insert data in columns.
-    auto* column_string = assert_cast<ColumnString*>(column_ptr);
-    switch (value.type()) {
-    case simdjson::ondemand::json_type::null: {
-        if (column->is_nullable()) {
-            // insert_default already push 1 to null_map
-            nullable_column->insert_default();
+    vectorized::IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+
+    if (column_ptr->is_nullable()) {
+        nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
+
+        data_column_ptr = nullable_column->get_nested_column().get_ptr();
+        data_serde = serde->get_nested_serdes()[0];
+
+        // kNullType will put 1 into the Null map, so there is no need to push 
0 for kNullType.
+        if (value.type() != simdjson::ondemand::json_type::null) {
+            nullable_column->get_null_map_data().push_back(0);
         } else {
+            nullable_column->insert_default();
+            *valid = true;
+            return Status::OK();
+        }
+    } else if (value.type() == simdjson::ondemand::json_type::null) 
[[unlikely]] {
+        if (_is_load) {
             RETURN_IF_ERROR(_append_error_msg(
                     nullptr, "Json value is null, but the column `{}` is not 
nullable.",
-                    slot_desc->col_name(), valid));
+                    column_name, valid));
             return Status::OK();
-        }
-        break;
-    }
-    case simdjson::ondemand::json_type::boolean: {
-        nullable_column->get_null_map_data().push_back(0);
-        if (value.get_bool()) {
-            column_string->insert_data("1", 1);
         } else {
-            column_string->insert_data("0", 1);
+            return Status::DataQualityError(
+                    "Json value is null, but the column `{}` is not 
nullable.", column_name);
         }
-        break;
     }
-    default: {
+
+    if (_is_load || !type_desc.is_complex_type()) {
         if (value.type() == simdjson::ondemand::json_type::string) {
-            auto* unescape_buffer =
-                    
reinterpret_cast<uint8_t*>(_simdjson_ondemand_unscape_padding_buffer.data());
-            std::string_view unescaped_value =
-                    
_ondemand_json_parser->unescape(value.get_raw_json_string(), unescape_buffer);
-            nullable_column->get_null_map_data().push_back(0);
-            column_string->insert_data(unescaped_value.data(), 
unescaped_value.length());
-            break;
+            std::string_view value_string = value.get_string();
+            Slice slice {value_string.data(), value_string.size()};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
+
+        } else {
+            // Maybe we can `switch (value->GetType()) case: kNumberType`.
+            // Note that `if (value->IsInt())`, but column is FloatColumn.
+            std::string_view json_str = simdjson::to_json_string(value);
+            Slice slice {json_str.data(), json_str.size()};
+            
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                       
_serde_options));
+        }
+    } else if (type_desc.type == TYPE_STRUCT) {
+        if (value.type() != simdjson::ondemand::json_type::object) 
[[unlikely]] {
+            return Status::DataQualityError(
+                    "Json value isn't object, but the column `{}` is struct.", 
column_name);
+        }
+
+        auto sub_col_size = type_desc.children.size();
+        simdjson::ondemand::object struct_value = value.get_object();
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
+
+        std::map<std::string, size_t> sub_col_name_to_idx;
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            sub_col_name_to_idx.emplace(type_desc.field_names[sub_col_idx], 
sub_col_idx);
+        }
+        vector<bool> has_value(sub_col_size, false);
+        for (simdjson::ondemand::field sub : struct_value) {
+            std::string_view sub_key_view = sub.unescaped_key();
+            std::string sub_key(sub_key_view.data(), sub_key_view.length());
+            std::transform(sub_key.begin(), sub_key.end(), sub_key.begin(), 
::tolower);
+
+            if (sub_col_name_to_idx.find(sub_key) == 
sub_col_name_to_idx.end()) [[unlikely]] {
+                continue;
+            }
+            size_t sub_column_idx = sub_col_name_to_idx[sub_key];
+            auto sub_column_ptr = 
struct_column_ptr->get_column(sub_column_idx).get_ptr();
+
+            if (has_value[sub_column_idx]) [[unlikely]] {
+                // Since struct_value can only be traversed once, we can only 
insert
+                // the original value first, then delete it, and then reinsert 
the new value.
+                sub_column_ptr->pop_back(1);
+            }
+            has_value[sub_column_idx] = true;
+
+            const auto& sub_col_type = type_desc.children[sub_column_idx];
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                    sub.value(), sub_col_type, sub_column_ptr, column_name + 
"." + sub_key,
+                    sub_serdes[sub_column_idx], valid));
         }
-        auto value_str = simdjson::to_json_string(value).value();
-        nullable_column->get_null_map_data().push_back(0);
-        column_string->insert_data(value_str.data(), value_str.length());
-    }
+
+        //fill missing subcolumn
+        for (size_t sub_col_idx = 0; sub_col_idx < sub_col_size; 
sub_col_idx++) {
+            if (has_value[sub_col_idx] == true) {
+                continue;
+            }
+
+            auto sub_column_ptr = 
struct_column_ptr->get_column(sub_col_idx).get_ptr();
+            if (sub_column_ptr->is_nullable()) {
+                sub_column_ptr->insert_default();
+                continue;
+            } else [[unlikely]] {
+                return Status::DataQualityError(
+                        "Json file structColumn miss field {} and this column 
isn't nullable.",
+                        column_name + "." + 
type_desc.field_names[sub_col_idx]);
+            }
+        }
+    } else if (type_desc.type == TYPE_MAP) {
+        if (value.type() != simdjson::ondemand::json_type::object) 
[[unlikely]] {
+            return Status::DataQualityError("Json value isn't object, but the 
column `{}` is map.",
+                                            column_name);
+        }
+        simdjson::ondemand::object object_value = value.get_object();
+
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);
+
+        size_t field_count = 0;
+        for (simdjson::ondemand::field member_value : object_value) {
+            auto f = [](std::string_view key_view, const TypeDescriptor& 
type_desc,
+                        vectorized::IColumn* column_ptr, DataTypeSerDeSPtr 
serde,
+                        vectorized::DataTypeSerDe::FormatOptions 
serde_options, bool* valid) {
+                auto data_column_ptr = column_ptr;
+                auto data_serde = serde;
+                if (column_ptr->is_nullable()) {
+                    auto nullable_column = 
static_cast<ColumnNullable*>(column_ptr);
+
+                    nullable_column->get_null_map_data().push_back(0);
+                    data_column_ptr = 
nullable_column->get_nested_column().get_ptr();
+                    data_serde = serde->get_nested_serdes()[0];
+                }
+                Slice slice(key_view.data(), key_view.length());
+
+                
RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, 
slice,
+                                                                           
serde_options));
+                return Status::OK();
+            };
+
+            RETURN_IF_ERROR(f(member_value.unescaped_key(), 
type_desc.children[0],
+                              
map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr(),
+                              sub_serdes[0], _serde_options, valid));
+
+            simdjson::ondemand::value field_value = member_value.value();
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                    field_value, type_desc.children[1],
+                    
map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr(),
+                    column_name + ".value", sub_serdes[1], valid));
+            field_count++;
+        }
+
+        auto& offsets = map_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+
+    } else if (type_desc.type == TYPE_ARRAY) {
+        if (value.type() != simdjson::ondemand::json_type::array) [[unlikely]] 
{
+            return Status::DataQualityError("Json value isn't array, but the 
column `{}` is array.",
+                                            column_name);
+        }
+
+        simdjson::ondemand::array array_value = value.get_array();
+
+        auto sub_serdes = data_serde->get_nested_serdes();
+        auto array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);
+
+        int field_count = 0;
+        for (simdjson::ondemand::value sub_value : array_value) {
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(
+                    sub_value, type_desc.children[0], 
array_column_ptr->get_data().get_ptr(),
+                    column_name + ".element", sub_serdes[0], valid));
+            field_count++;
+        }
+        auto& offsets = array_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+
+    } else {
+        return Status::InternalError("Not support load to complex column.");
     }
     *valid = true;
     return Status::OK();
@@ -1677,13 +1950,14 @@ Status 
NewJsonReader::_simdjson_write_columns_by_jsonpath(
             has_valid_value = true;
         } else if (i >= _parsed_jsonpaths.size() || st.is<NOT_FOUND>()) {
             // not match in jsondata, filling with default value
-            RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
+            RETURN_IF_ERROR(_fill_missing_column(slot_desc, _serdes[i], 
column_ptr, valid));
             if (!(*valid)) {
                 return Status::OK();
             }
         } else {
-            RETURN_IF_ERROR(
-                    _simdjson_write_data_to_column(json_value, slot_desc, 
column_ptr, valid));
+            RETURN_IF_ERROR(_simdjson_write_data_to_column(json_value, 
slot_desc->type(),
+                                                           column_ptr, 
slot_desc->col_name(),
+                                                           _serdes[i], valid));
             if (!(*valid)) {
                 return Status::OK();
             }
@@ -1741,25 +2015,30 @@ Status NewJsonReader::_get_column_default_value(
     return Status::OK();
 }
 
-Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, IColumn* 
column_ptr,
-                                           bool* valid) {
-    if (slot_desc->is_nullable()) {
-        auto* nullable_column = reinterpret_cast<ColumnNullable*>(column_ptr);
-        column_ptr = &nullable_column->get_nested_column();
-        auto col_value = _col_default_value_map.find(slot_desc->col_name());
-        if (col_value == _col_default_value_map.end()) {
+Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, 
DataTypeSerDeSPtr serde,
+                                           IColumn* column_ptr, bool* valid) {
+    auto col_value = _col_default_value_map.find(slot_desc->col_name());
+    if (col_value == _col_default_value_map.end()) {
+        if (slot_desc->is_nullable()) {
+            auto* nullable_column = static_cast<ColumnNullable*>(column_ptr);
             nullable_column->insert_default();
         } else {
-            const std::string& v_str = col_value->second;
-            nullable_column->get_null_map_data().push_back(0);
-            assert_cast<ColumnString*>(column_ptr)->insert_data(v_str.c_str(), 
v_str.size());
+            if (_is_load) {
+                RETURN_IF_ERROR(_append_error_msg(
+                        nullptr, "The column `{}` is not nullable, but it's 
not found in jsondata.",
+                        slot_desc->col_name(), valid));
+            } else {
+                return Status::DataQualityError(
+                        "The column `{}` is not nullable, but it's not found 
in jsondata.",
+                        slot_desc->col_name());
+            }
         }
     } else {
-        RETURN_IF_ERROR(_append_error_msg(
-                nullptr, "The column `{}` is not nullable, but it's not found 
in jsondata.",
-                slot_desc->col_name(), valid));
+        const std::string& v_str = col_value->second;
+        Slice column_default_value {v_str};
+        RETURN_IF_ERROR(serde->deserialize_one_cell_from_json(*column_ptr, 
column_default_value,
+                                                              _serde_options));
     }
-
     *valid = true;
     return Status::OK();
 }
diff --git a/be/src/vec/exec/format/json/new_json_reader.h 
b/be/src/vec/exec/format/json/new_json_reader.h
index 0df3747b8c2..6828b6b2abf 100644
--- a/be/src/vec/exec/format/json/new_json_reader.h
+++ b/be/src/vec/exec/format/json/new_json_reader.h
@@ -88,7 +88,8 @@ public:
     ~NewJsonReader() override = default;
 
     Status init_reader(const std::unordered_map<std::string, 
vectorized::VExprContextSPtr>&
-                               col_default_value_ctx);
+                               col_default_value_ctx,
+                       bool is_load);
     Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
     Status get_columns(std::unordered_map<std::string, TypeDescriptor>* 
name_to_type,
                        std::unordered_set<std::string>* missing_cols) override;
@@ -129,7 +130,8 @@ private:
                              const std::vector<SlotDescriptor*>& slot_descs, 
bool* valid);
 
     Status _write_data_to_column(rapidjson::Value::ConstValueIterator value,
-                                 SlotDescriptor* slot_desc, 
vectorized::IColumn* column_ptr,
+                                 const TypeDescriptor& type_desc, 
vectorized::IColumn* column_ptr,
+                                 const std::string& column_name, 
DataTypeSerDeSPtr serde,
                                  bool* valid);
 
     Status _write_columns_by_jsonpath(rapidjson::Value& objectValue,
@@ -178,8 +180,10 @@ private:
                                       const std::vector<SlotDescriptor*>& 
slot_descs, bool* valid);
 
     Status _simdjson_write_data_to_column(simdjson::ondemand::value& value,
-                                          SlotDescriptor* slot_desc,
-                                          vectorized::IColumn* column_ptr, 
bool* valid);
+                                          const TypeDescriptor& type_desc,
+                                          vectorized::IColumn* column_ptr,
+                                          const std::string& column_name, 
DataTypeSerDeSPtr serde,
+                                          bool* valid);
 
     Status _simdjson_write_columns_by_jsonpath(simdjson::ondemand::object* 
value,
                                                const 
std::vector<SlotDescriptor*>& slot_descs,
@@ -197,8 +201,8 @@ private:
             const std::unordered_map<std::string, 
vectorized::VExprContextSPtr>&
                     col_default_value_ctx);
 
-    Status _fill_missing_column(SlotDescriptor* slot_desc, 
vectorized::IColumn* column_ptr,
-                                bool* valid);
+    Status _fill_missing_column(SlotDescriptor* slot_desc, DataTypeSerDeSPtr 
serde,
+                                vectorized::IColumn* column_ptr, bool* valid);
 
     RuntimeState* _state = nullptr;
     RuntimeProfile* _profile = nullptr;
@@ -283,6 +287,22 @@ private:
     std::unique_ptr<simdjson::ondemand::parser> _ondemand_json_parser;
     // column to default value string map
     std::unordered_map<std::string, std::string> _col_default_value_map;
+
+    bool _is_load = true;
+    //Used to indicate whether it is a stream load. When loading, only data 
will be inserted into columnString.
+    //If an illegal value is encountered during the load process, 
`_append_error_msg` should be called
+    //instead of directly returning `Status::DataQualityError`
+
+    bool _is_hive_table = false;
+    // In hive : create table xxx ROW FORMAT SERDE 
'org.apache.hive.hcatalog.data.JsonSerDe';
+    // Hive will not allow you to create columns with the same name but 
different case, including field names inside
+    // structs, and will automatically convert uppercase names in create sql 
to lowercase.However, when Hive loads data
+    // to table, the column names in the data may be uppercase,and there may 
be multiple columns with
+    // the same name but different capitalization.We refer to the behavior of 
hive, convert all column names
+    // in the data to lowercase,and use the last one as the insertion value
+
+    DataTypeSerDeSPtrs _serdes;
+    vectorized::DataTypeSerDe::FormatOptions _serde_options;
 };
 
 } // namespace vectorized
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 997eef02090..ba8048f73a9 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -931,8 +931,8 @@ Status VFileScanner::_get_next_reader() {
             _cur_reader =
                     NewJsonReader::create_unique(_state, _profile, &_counter, 
*_params, range,
                                                  _file_slot_descs, 
&_scanner_eof, _io_ctx.get());
-            init_status =
-                    
((NewJsonReader*)(_cur_reader.get()))->init_reader(_col_default_value_ctx);
+            init_status = ((NewJsonReader*)(_cur_reader.get()))
+                                  ->init_reader(_col_default_value_ctx, 
_is_load);
             break;
         }
         case TFileFormatType::FORMAT_AVRO: {
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run69.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run69.hql
new file mode 100644
index 00000000000..adf0f7d56b2
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run69.hql
@@ -0,0 +1,35 @@
+use `default`;
+
+
+CREATE TABLE json_nested_complex_table (
+    user_ID STRING,
+    user_PROFILE STRUCT<
+        name: STRING,
+        AGE: INT,
+        preferences: MAP<
+            STRING,
+            STRUCT<
+                preference_ID: INT,
+                preference_VALUES: ARRAY<STRING>
+            >
+        >
+    >,
+    activity_LOG ARRAY<
+        STRUCT<
+            activity_DATE: STRING,
+            activities: MAP<
+                STRING,
+                STRUCT<
+                    `DETAILS`: STRING,
+                    metrics: MAP<STRING, float>
+                >
+            >
+        >
+    >
+) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+
+LOCATION
+  '/user/doris/preinstalled_data/json/json_nested_complex_table';
+
+
+msck repair table json_nested_complex_table;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run70.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run70.hql
new file mode 100644
index 00000000000..73df8cba557
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run70.hql
@@ -0,0 +1,73 @@
+use `default`;
+
+
+CREATE TABLE json_all_complex_types (
+  `id` int,
+  `boolean_col` boolean, 
+  `tinyint_col` tinyint, 
+  `smallint_col` smallint, 
+  `int_col` int, 
+  `bigint_col` bigint, 
+  `float_col` float, 
+  `double_col` double, 
+  `decimal_col1` decimal(9,0), 
+  `decimal_col2` decimal(8,4), 
+  `decimal_col3` decimal(18,6), 
+  `decimal_col4` decimal(38,12), 
+  `string_col` string, 
+  `binary_col` binary, 
+  `date_col` date, 
+  `timestamp_col1` timestamp, 
+  `timestamp_col2` timestamp, 
+  `timestamp_col3` timestamp, 
+  `char_col1` char(50), 
+  `char_col2` char(100), 
+  `char_col3` char(255), 
+  `varchar_col1` varchar(50), 
+  `varchar_col2` varchar(100), 
+  `varchar_col3` varchar(255), 
+  `t_map_string` map<string,string>, 
+  `t_map_varchar` map<varchar(65535),varchar(65535)>, 
+  `t_map_char` map<char(10),char(10)>, 
+  `t_map_int` map<int,int>, 
+  `t_map_bigint` map<bigint,bigint>, 
+  `t_map_float` map<float,float>, 
+  `t_map_double` map<double,double>, 
+  `t_map_boolean` map<boolean,boolean>, 
+  `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>, 
+  `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>, 
+  `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>, 
+  `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>, 
+  `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>, 
+  `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>, 
+  `t_array_string` array<string>, 
+  `t_array_int` array<int>, 
+  `t_array_bigint` array<bigint>, 
+  `t_array_float` array<float>, 
+  `t_array_double` array<double>, 
+  `t_array_boolean` array<boolean>, 
+  `t_array_varchar` array<varchar(65535)>, 
+  `t_array_char` array<char(10)>, 
+  `t_array_decimal_precision_2` array<decimal(2,1)>, 
+  `t_array_decimal_precision_4` array<decimal(4,2)>, 
+  `t_array_decimal_precision_8` array<decimal(8,4)>, 
+  `t_array_decimal_precision_17` array<decimal(17,8)>, 
+  `t_array_decimal_precision_18` array<decimal(18,8)>, 
+  `t_array_decimal_precision_38` array<decimal(38,16)>, 
+  `t_struct_bigint` struct<s_bigint:bigint>, 
+  `t_complex` map<string,array<struct<s_int:int>>>, 
+  `t_struct_nested` struct<struct_field:array<string>>, 
+  `t_struct_null` struct<struct_field_null:string,struct_field_null2:string>, 
+  `t_struct_non_nulls_after_nulls` 
struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>, 
+  `t_nested_struct_non_nulls_after_nulls` 
struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>,
 
+  `t_map_null_value` map<string,string>, 
+  `t_array_string_starting_with_nulls` array<string>, 
+  `t_array_string_with_nulls_in_between` array<string>, 
+  `t_array_string_ending_with_nulls` array<string>, 
+  `t_array_string_all_nulls` array<string>
+    ) PARTITIONED BY (`dt` string)
+ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' 
+LOCATION
+  '/user/doris/preinstalled_data/json/json_all_complex_types';
+
+msck repair table json_all_complex_types;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run71.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run71.hql
new file mode 100644
index 00000000000..ec99e72d2f5
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run71.hql
@@ -0,0 +1,13 @@
+use `default`;
+
+
+CREATE TABLE json_load_data_table (
+    `id` int,
+    `col1` int,
+    `col2` struct< col2a:int, col2b:string>,
+    `col3` map<int,string>
+) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
+LOCATION
+  '/user/doris/preinstalled_data/json/json_load_data_table';
+
+msck repair table json_load_data_table;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt1/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt1/000000_0
new file mode 100644
index 00000000000..5fe37cbc6f0
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt1/000000_0
@@ -0,0 +1,3 @@
+{"id":1,"boolean_col":true,"tinyint_col":127,"smallint_col":32767,"int_col":2147483647,"bigint_col":9223372036854775807,"float_col":123.45,"double_col":123456.789,"decimal_col1":123456789,"decimal_col2":1234.5678,"decimal_col3":123456.789012,"decimal_col4":123456789.012345678901,"string_col":"string_value","binary_col":"binary_value","date_col":"2024-03-20","timestamp_col1":"2024-03-20
 12:00:00","timestamp_col2":"2024-03-20 
12:00:00.123456789","timestamp_col3":"2024-03-20 12:00:00.123456 [...]
+{"id":2,"boolean_col":false,"tinyint_col":58,"smallint_col":12345,"int_col":2147483000,"bigint_col":null,"float_col":789.56,"double_col":654321.123,"decimal_col1":987654321,"decimal_col2":5678.1234,"decimal_col3":987654.321098,"decimal_col4":987654321.098765432109,"string_col":"changed_string","binary_col":"new_binary_value","date_col":"2025-05-25","timestamp_col1":"2025-05-25
 15:30:00","timestamp_col2":"2025-05-25 
15:30:00.654321987","timestamp_col3":"2025-05-25 15:30:00.654321987","cha [...]
+{"id":3,"boolean_col":false,"tinyint_col":-128,"smallint_col":-32768,"int_col":-2147483648,"bigint_col":-9223372036854775808,"float_col":-3.4028235E38,"double_col":-1.7976931348623157E308,"decimal_col1":-999999999,"decimal_col2":-9999.9999,"decimal_col3":-999999999.999999,"decimal_col4":null,"string_col":"min_string_value","binary_col":"xxxx","date_col":"2001-01-01","timestamp_col1":"2001-01-01
 00:00:00","timestamp_col2":"2001-01-01 00:00:00","timestamp_col3":"2001-01-01 
00:00:00","char_ [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt2/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt2/000000_0
new file mode 100644
index 00000000000..0a823bee693
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt2/000000_0
@@ -0,0 +1 @@
+{"id":4,"boolean_col":null,"tinyint_col":null,"smallint_col":null,"int_col":null,"bigint_col":null,"float_col":123.45,"double_col":null,"decimal_col1":null,"decimal_col2":null,"decimal_col3":null,"decimal_col4":null,"string_col":null,"binary_col":null,"date_col":null,"timestamp_col1":null,"timestamp_col2":null,"timestamp_col3":null,"char_col1":null,"char_col2":null,"char_col3":null,"varchar_col1":null,"varchar_col2":null,"varchar_col3":null,"t_map_string":null,"t_map_varchar":null,"t_map
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt3/000000_0
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt3/000000_0
new file mode 100644
index 00000000000..a5e46399fdd
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_all_complex_types/dt=dt3/000000_0
@@ -0,0 +1,2 @@
+{"id":5,"boolean_col":null,"tinyint_col":null,"smallint_col":null,"int_col":null,"bigint_col":null,"float_col":null,"double_col":null,"decimal_col1":null,"decimal_col2":null,"decimal_col3":null,"decimal_col4":null,"string_col":null,"binary_col":null,"date_col":null,"timestamp_col1":null,"timestamp_col2":null,"timestamp_col3":null,"char_col1":null,"char_col2":null,"char_col3":null,"varchar_col1":null,"varchar_col2":null,"varchar_col3":null,"t_map_string":null,"t_map_varchar":null,"t_map_c
 [...]
+{"id":6,"boolean_col":null,"tinyint_col":null,"smallint_col":null,"int_col":null,"bigint_col":null,"float_col":null,"double_col":null,"decimal_col1":null,"decimal_col2":null,"decimal_col3":null,"decimal_col4":null,"string_col":null,"binary_col":null,"date_col":null,"timestamp_col1":null,"timestamp_col2":null,"timestamp_col3":null,"char_col1":null,"char_col2":null,"char_col3":null,"varchar_col1":null,"varchar_col2":null,"varchar_col3":null,"t_map_string":null,"t_map_varchar":null,"t_map_c
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_load_data_table/1
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_load_data_table/1
new file mode 100644
index 00000000000..70d1265f98d
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_load_data_table/1
@@ -0,0 +1,13 @@
+{"id":1,"col1":10,"col2":{"col2a":10,"col2b":"string1"},"col3":{"1":"string10"}}
+{"id":2,"col1":10,"col1":20,"col2":{"col2b":"string2","col2a":0,"Col2A":20},"col3":{"2":"string2"}}
+{"id":3,"col1":10,"col1":20,"COL1":30,"COL2":{"col2a":30,"col2b":"string3"}}
+{"id":4,"COL1":40,"col2":{"col2a":10,"col2b":"string4","new_col":"new_val","col2a":40},"col3":{"4":"string4"}}
+{"id":5}
+{"id":6,"col1":60,"col2":{"COL2a":60,"col2b":600},"col3":{"6":600}}
+{"id":7,"col1":70,"col3":{"7":"string7"},"col2":{"col2b":"string7","col2a":70}}
+
+
+
+
+{}
+{"a":5}
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/1
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/1
new file mode 100644
index 00000000000..11342c441bc
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/1
@@ -0,0 +1,2 @@
+{"user_id":"user1","user_profile":{"name":"Alice","age":28,"preferences":{"sports":{"preference_id":101,"preference_values":["soccer","tennis"]},"music":{"preference_id":102,"preference_values":["rock","classical"]}}},"activity_log":[{"activity_date":"2024-08-01","activities":{"workout":{"details":"Morning
 run","metrics":{"duration":30.5,"calories":200.0}},"reading":{"details":"Read 
book on 
Hive","metrics":{"pages":50.0,"time":2.0}}}},{"activity_date":"2024-08-02","activities":{"travel":
 [...]
+{"user_id":"user2","user_profile":{"name":"Bob","age":32,"preferences":{"books":{"preference_id":201,"preference_values":["fiction","non-fiction"]},"travel":{"preference_id":202,"preference_values":["beaches","mountains"]}}},"activity_log":[{"activity_date":"2024-08-01","activities":{"hiking":{"details":"Mountain
 
trail","metrics":{"distance":10.0,"elevation":500.0}},"photography":{"details":"Wildlife
 
photoshoot","metrics":{"photos_taken":100.0,"time":4.0}}}},{"activity_date":"2024-08-02"
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/2
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/2
new file mode 100644
index 00000000000..e1b0befc7bc
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/2
@@ -0,0 +1 @@
+{"user_id":"user3","user_profile":{"name":"Carol","age":24,"preferences":{"food":{"preference_id":301,"preference_values":["vegan","desserts"]},"movies":{"preference_id":302,"preference_values":["action","comedy"]}}},"activity_log":[{"activity_date":"2024-08-01","activities":{"cooking":{"details":"Made
 vegan 
meal","metrics":{"time_spent":1.5,"calories":500.0}},"movie":{"details":"Watched
 action 
movie","metrics":{"duration":2.0,"rating":8.5}}}},{"activity_date":"2024-08-02","activities":{
 [...]
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/modify_2
 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/modify_2
new file mode 100644
index 00000000000..08f1586f3aa
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json/json_nested_complex_table/modify_2
@@ -0,0 +1,2 @@
+{"user_ID":"user4","user_PROFILE":{"name":"Carol","age":24,"preferences":{"food":{"preference_ID":301,"preference_VALUES":["vegan","desserts"]},"movies":{"preference_ID":302,"preference_VALUES":["action","comedy"]}}},"activity_LOG":[{"activity_DATE":"2024-08-01","activities":{"cooking":{"DETAILS":"Made
 vegan 
meal","metrics":{"time_spent":1.5,"calories":500.0}},"movie":{"DETAILS":"Watched
 action 
movie","metrics":{"duration":2.0,"rating":8.5}}}},{"activity_DATE":"2024-08-02","activities":{
 [...]
+
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
index 97032467cec..0f839d238b2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java
@@ -94,6 +94,9 @@ public class HiveMetaStoreClientHelper {
 
     private static final Pattern digitPattern = Pattern.compile("(\\d+)");
 
+    public static final String HIVE_JSON_SERDE = 
"org.apache.hive.hcatalog.data.JsonSerDe";
+    public static final String LEGACY_HIVE_JSON_SERDE = 
"org.apache.hadoop.hive.serde2.JsonSerDe";
+
     public enum HiveFileFormat {
         TEXT_FILE(0, "text"),
         PARQUET(1, "parquet"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index e710bdb935d..3a2a4d3eb5c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -364,14 +364,21 @@ public class HiveScanNode extends FileQueryScanNode {
     @Override
     public TFileFormatType getFileFormatType() throws UserException {
         TFileFormatType type = null;
-        String inputFormatName = 
hmsTable.getRemoteTable().getSd().getInputFormat();
+        Table table = hmsTable.getRemoteTable();
+        String inputFormatName = table.getSd().getInputFormat();
         String hiveFormat = 
HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName);
         if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) 
{
             type = TFileFormatType.FORMAT_PARQUET;
         } else if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.ORC.getDesc())) {
             type = TFileFormatType.FORMAT_ORC;
         } else if 
(hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.TEXT_FILE.getDesc()))
 {
-            type = TFileFormatType.FORMAT_CSV_PLAIN;
+            String serDeLib = 
table.getSd().getSerdeInfo().getSerializationLib();
+            if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_JSON_SERDE)
+                    || 
serDeLib.equals(HiveMetaStoreClientHelper.LEGACY_HIVE_JSON_SERDE)) {
+                type = TFileFormatType.FORMAT_JSON;
+            } else {
+                type = TFileFormatType.FORMAT_CSV_PLAIN;
+            }
         }
         return type;
     }
@@ -383,11 +390,12 @@ public class HiveScanNode extends FileQueryScanNode {
 
     @Override
     protected TFileAttributes getFileAttributes() throws UserException {
-        TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
+        TFileAttributes fileAttributes = new TFileAttributes();
         Table table = hmsTable.getRemoteTable();
         // TODO: separate hive text table and OpenCsv table
         String serDeLib = table.getSd().getSerdeInfo().getSerializationLib();
         if 
(serDeLib.equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) {
+            TFileTextScanRangeParams textParams = new 
TFileTextScanRangeParams();
             // set properties of LazySimpleSerDe
             // 1. set column separator
             
textParams.setColumnSeparator(HiveProperties.getFieldDelimiter(table));
@@ -401,7 +409,10 @@ public class HiveScanNode extends FileQueryScanNode {
             HiveProperties.getEscapeDelimiter(table).ifPresent(d -> 
textParams.setEscape(d.getBytes()[0]));
             // 6. set null format
             textParams.setNullFormat(HiveProperties.getNullFormat(table));
+            fileAttributes.setTextParams(textParams);
+            fileAttributes.setHeaderType("");
         } else if 
(serDeLib.equals("org.apache.hadoop.hive.serde2.OpenCSVSerde")) {
+            TFileTextScanRangeParams textParams = new 
TFileTextScanRangeParams();
             // set set properties of OpenCSVSerde
             // 1. set column separator
             
textParams.setColumnSeparator(HiveProperties.getSeparatorChar(table));
@@ -411,17 +422,29 @@ public class HiveScanNode extends FileQueryScanNode {
             
textParams.setEnclose(HiveProperties.getQuoteChar(table).getBytes()[0]);
             // 4. set escape char
             
textParams.setEscape(HiveProperties.getEscapeChar(table).getBytes()[0]);
+            fileAttributes.setTextParams(textParams);
+            fileAttributes.setHeaderType("");
+            if (textParams.isSetEnclose()) {
+                fileAttributes.setTrimDoubleQuotes(true);
+            }
+        } else if (serDeLib.equals("org.apache.hive.hcatalog.data.JsonSerDe")) 
{
+            TFileTextScanRangeParams textParams = new 
TFileTextScanRangeParams();
+            textParams.setColumnSeparator("\t");
+            textParams.setLineDelimiter("\n");
+            fileAttributes.setTextParams(textParams);
+
+            fileAttributes.setJsonpaths("");
+            fileAttributes.setJsonRoot("");
+            fileAttributes.setNumAsString(true);
+            fileAttributes.setFuzzyParse(false);
+            fileAttributes.setReadJsonByLine(true);
+            fileAttributes.setStripOuterArray(false);
+            fileAttributes.setHeaderType("");
         } else {
             throw new UserException(
                     "unsupported hive table serde: " + serDeLib);
         }
 
-        TFileAttributes fileAttributes = new TFileAttributes();
-        fileAttributes.setTextParams(textParams);
-        fileAttributes.setHeaderType("");
-        if (textParams.isSet(TFileTextScanRangeParams._Fields.ENCLOSE)) {
-            fileAttributes.setTrimDoubleQuotes(true);
-        }
         return fileAttributes;
     }
 
diff --git 
a/regression-test/data/external_table_p0/hive/hive_json_basic_test.out 
b/regression-test/data/external_table_p0/hive/hive_json_basic_test.out
new file mode 100644
index 00000000000..9023f5d72b1
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/hive_json_basic_test.out
@@ -0,0 +1,115 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !q1 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+4      \N      \N      \N      \N      \N      123.45  \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      {1:10}  \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      [1.2345, 
2.3456]        \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      [null, "value1", "value2"]      \N      \N      \N      dt2
+5      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+6      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+
+-- !q2 --
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+
+-- !q3 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+
+-- !q4 --
+123.45
+789.56
+-3.4028235e+38
+123.45
+
+-- !q5 --
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+
+-- !q6 --
+user1  {"name":"Alice", "age":28, 
"preferences":{"sports":{"preference_id":101, "preference_values":["soccer", 
"tennis"]}, "music":{"preference_id":102, "preference_values":["rock", 
"classical"]}}}  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metric [...]
+user2  {"name":"Bob", "age":32, "preferences":{"books":{"preference_id":201, 
"preference_values":["fiction", "non-fiction"]}, "travel":{"preference_id":202, 
"preference_values":["beaches", "mountains"]}}}   
[{"activity_date":"2024-08-01", "activities":{"hiking":{"details":"Mountain 
trail", "metrics":{"distance":10, "elevation":500}}, 
"photography":{"details":"Wildlife photoshoot", "metrics":{"photos_taken":100, 
"time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details" [...]
+user3  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+user4  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+
+-- !q7 --
+user1  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metrics":{"distance":500, 
"time":3}}, "meeting":{"details":"Project meeting", "metrics":{"duration":1.5, 
"participants":5}}}}]
+user2  [{"activity_date":"2024-08-01", 
"activities":{"hiking":{"details":"Mountain trail", "metrics":{"distance":10, 
"elevation":500}}, "photography":{"details":"Wildlife photoshoot", 
"metrics":{"photos_taken":100, "time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details":"Photography workshop", 
"metrics":{"duration":3, "participants":15}}, "shopping":{"details":"Bought 
camera gear", "metrics":{"items":5, "cost":1500}}}}]
+user3  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+user4  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+
+-- !q8 --
+\N     \N      \N      \N
+\N     \N      \N      \N
+1      10      {"col2a":10, "col2b":"string1"} {1:"string10"}
+2      20      {"col2a":20, "col2b":"string2"} {2:"string2"}
+3      30      {"col2a":30, "col2b":"string3"} \N
+4      40      {"col2a":40, "col2b":"string4"} {4:"string4"}
+5      \N      \N      \N
+6      60      {"col2a":60, "col2b":"600"}     {6:"600"}
+7      70      {"col2a":70, "col2b":"string7"} {7:"string7"}
+
+-- !q9 --
+\N     \N
+\N     \N
+\N     5
+10     1
+20     2
+30     3
+40     4
+60     6
+70     7
+
+-- !q1 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+4      \N      \N      \N      \N      \N      123.45  \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      {1:10}  \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      [1.2345, 
2.3456]        \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      [null, "value1", "value2"]      \N      \N      \N      dt2
+5      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+6      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      \N      \N      \N      \N      \N      \N      \N      
\N      \N      \N      dt3
+
+-- !q2 --
+3      false   -128    -32768  -2147483648     -9223372036854775808    
-3.4028235e+38  -1.7976931348623157E308 -999999999      -9999.9999      
-999999999.999999       \N      min_string_value        xxxx    2001-01-01      
2001-01-01T00:00        2001-01-01T00:00        2001-01-01T00:00        
char_min_value1                                         char_min_value2         
                                                                                
char_min_value3                                                                 
                                          [...]
+
+-- !q3 --
+1      true    127     32767   2147483647      9223372036854775807     123.45  
123456.789      123456789       1234.5678       123456.789012   
123456789.012345678901  string_value    binary_value    2024-03-20      
2024-03-20T12:00        2024-03-20T12:00:00.123457      
2024-03-20T12:00:00.123457      char_value1                                     
        char_value2                                                             
                                char_value3                                     
                                                              [...]
+
+-- !q4 --
+123.45
+789.56
+-3.4028235e+38
+123.45
+
+-- !q5 --
+2      false   58      12345   2147483000      \N      789.56  654321.123      
987654321       5678.1234       987654.321098   987654321.098765432109  
changed_string  new_binary_value        2025-05-25      2025-05-25T15:30        
2025-05-25T15:30:00.654322      2025-05-25T15:30:00.654322      char_new_value1 
                                        char_new_value2                         
                                                                char_new_value3 
                                                                                
                         [...]
+
+-- !q6 --
+user1  {"name":"Alice", "age":28, 
"preferences":{"sports":{"preference_id":101, "preference_values":["soccer", 
"tennis"]}, "music":{"preference_id":102, "preference_values":["rock", 
"classical"]}}}  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metric [...]
+user2  {"name":"Bob", "age":32, "preferences":{"books":{"preference_id":201, 
"preference_values":["fiction", "non-fiction"]}, "travel":{"preference_id":202, 
"preference_values":["beaches", "mountains"]}}}   
[{"activity_date":"2024-08-01", "activities":{"hiking":{"details":"Mountain 
trail", "metrics":{"distance":10, "elevation":500}}, 
"photography":{"details":"Wildlife photoshoot", "metrics":{"photos_taken":100, 
"time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details" [...]
+user3  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+user4  {"name":"Carol", "age":24, "preferences":{"food":{"preference_id":301, 
"preference_values":["vegan", "desserts"]}, "movies":{"preference_id":302, 
"preference_values":["action", "comedy"]}}}   [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength train 
[...]
+
+-- !q7 --
+user1  [{"activity_date":"2024-08-01", 
"activities":{"workout":{"details":"Morning run", "metrics":{"duration":30.5, 
"calories":200}}, "reading":{"details":"Read book on Hive", 
"metrics":{"pages":50, "time":2}}}}, {"activity_date":"2024-08-02", 
"activities":{"travel":{"details":"Flight to NY", "metrics":{"distance":500, 
"time":3}}, "meeting":{"details":"Project meeting", "metrics":{"duration":1.5, 
"participants":5}}}}]
+user2  [{"activity_date":"2024-08-01", 
"activities":{"hiking":{"details":"Mountain trail", "metrics":{"distance":10, 
"elevation":500}}, "photography":{"details":"Wildlife photoshoot", 
"metrics":{"photos_taken":100, "time":4}}}}, {"activity_date":"2024-08-02", 
"activities":{"workshop":{"details":"Photography workshop", 
"metrics":{"duration":3, "participants":15}}, "shopping":{"details":"Bought 
camera gear", "metrics":{"items":5, "cost":1500}}}}]
+user3  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+user4  [{"activity_date":"2024-08-01", 
"activities":{"cooking":{"details":"Made vegan meal", 
"metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched 
action movie", "metrics":{"duration":2, "rating":8.5}}}}, 
{"activity_date":"2024-08-02", "activities":{"gym":{"details":"Strength 
training", "metrics":{"duration":1, "calories":300}}, 
"shopping":{"details":"Bought groceries", "metrics":{"items":10, "cost":100}}}}]
+
+-- !q8 --
+\N     \N      \N      \N
+\N     \N      \N      \N
+1      10      {"col2a":10, "col2b":"string1"} {1:"string10"}
+2      20      {"col2a":20, "col2b":"string2"} {2:"string2"}
+3      30      {"col2a":30, "col2b":"string3"} \N
+4      40      {"col2a":40, "col2b":"string4"} {4:"string4"}
+5      \N      \N      \N
+6      60      {"col2a":60, "col2b":"600"}     {6:"600"}
+7      70      {"col2a":70, "col2b":"string7"} {7:"string7"}
+
+-- !q9 --
+\N     \N
+\N     \N
+\N     5
+10     1
+20     2
+30     3
+40     4
+60     6
+70     7
+
diff --git 
a/regression-test/suites/external_table_p0/hive/hive_json_basic_test.groovy 
b/regression-test/suites/external_table_p0/hive/hive_json_basic_test.groovy
new file mode 100644
index 00000000000..9d05e1a4c74
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/hive_json_basic_test.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("hive_json_basic_test",  
"p0,external,hive,external_docker,external_docker_hive") {
+
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        try {
+            String externalEnvIp = 
context.config.otherConfigs.get("externalEnvIp")
+            String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+            String catalog_name = "${hivePrefix}_hive_json_basic_test"
+            String broker_name = "hdfs"
+
+            sql """drop catalog if exists ${catalog_name}"""
+            sql """create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hive.metastore.uris'='thrift://${externalEnvIp}:${hms_port}'
+            );"""
+            sql """use `${catalog_name}`.`default`"""
+
+            String tb1 = """json_all_complex_types"""
+            String tb2 = """json_nested_complex_table"""
+            String tb3 = """json_load_data_table"""
+
+            def tables = sql """ show tables """
+            logger.info("tables = ${tables}")
+
+            qt_q1 """ select * from ${tb1} order by id """
+            qt_q2 """ select * from ${tb1} where tinyint_col < 0  order by id 
"""
+            qt_q3 """ select * from ${tb1} where bigint_col > 0 order by id """
+            qt_q4 """ select float_col from ${tb1} where float_col is not null 
 order by id """
+            qt_q5 """ select * from ${tb1} where id = 2 order by id """
+
+
+
+            qt_q6 """ select * from  ${tb2} order by user_id"""
+            qt_q7 """ select user_id,activity_log from  ${tb2} order by 
user_id"""
+
+
+            order_qt_q8 """ select * from ${tb3} order by id """
+            
+            order_qt_q9 """ select col1,id from ${tb3} order by id """ 
+            
+
+
+
+            sql """drop catalog if exists ${catalog_name}"""
+        } finally {
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-3.0 updated: [feature](hive)support hive catalog read json table. (#43469) (#44848)

Reply via email to