DarvenDuan commented on code in PR #20078:
URL: https://github.com/apache/doris/pull/20078#discussion_r1221107137
##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -1370,53 +1374,40 @@ Status
NewJsonReader::_simdjson_handle_nested_complex_json(
return Status::OK();
}
-size_t NewJsonReader::_column_index(const StringRef& name, size_t key_index) {
- /// Optimization by caching the order of fields (which is almost always
the same)
- /// and a quick check to match the next expected field, instead of
searching the hash table.
- if (_prev_positions.size() > key_index && _prev_positions[key_index] &&
- name == _prev_positions[key_index]->get_first()) {
- return _prev_positions[key_index]->get_second();
- } else {
- auto* it = _slot_desc_index.find(name);
- if (it) {
- if (key_index < _prev_positions.size()) {
- _prev_positions[key_index] = it;
- }
- return it->get_second();
- } else {
- return size_t(-1);
- }
- }
-}
-
Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object*
value, Block& block,
const
std::vector<SlotDescriptor*>& slot_descs,
bool* valid) {
// set
_seen_columns.assign(block.columns(), false);
size_t cur_row_count = block.rows();
bool has_valid_value = false;
- // iterate through object, simdjson::ondemond will parsing on the fly
- size_t key_index = 0;
- for (auto field : *value) {
- std::string_view key = field.unescaped_key();
- StringRef name_ref(key.data(), key.size());
- const size_t column_index = _column_index(name_ref, key_index++);
- if (UNLIKELY(ssize_t(column_index) < 0)) {
- // This key is not exist in slot desc, just ignore
+ for (size_t i = 0; i < slot_descs.size(); ++i) {
+ auto slot_desc = slot_descs[i];
+ if (!slot_desc->is_materialized()) {
continue;
}
- simdjson::ondemand::value val = field.value();
- auto* column_ptr =
block.get_by_position(column_index).column->assume_mutable().get();
- RETURN_IF_ERROR(
- _simdjson_write_data_to_column(val, slot_descs[column_index],
column_ptr, valid));
- if (!(*valid)) {
- return Status::OK();
+ auto* column_ptr =
block.get_by_position(i).column->assume_mutable().get();
+ auto field = value->find_field_unordered(slot_desc->col_name());
Review Comment:
Thanks, I will fix it
##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -1730,4 +1726,52 @@ Status
NewJsonReader::_simdjson_write_columns_by_jsonpath(
return Status::OK();
}
+Status NewJsonReader::_get_column_default_value(
Review Comment:
I think I have filled all slot before, do I need to fill missing slot again?
##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -1730,4 +1726,52 @@ Status
NewJsonReader::_simdjson_write_columns_by_jsonpath(
return Status::OK();
}
+Status NewJsonReader::_get_column_default_value(
+ const std::vector<SlotDescriptor*>& slot_descs,
+ const std::unordered_map<std::string, vectorized::VExprContext*>&
col_default_value_ctx) {
+ for (auto slot_desc : slot_descs) {
+ auto it = col_default_value_ctx.find(slot_desc->col_name());
+ if (it != col_default_value_ctx.end() && it->second != nullptr) {
+ auto* ctx = it->second;
+ // empty block to save default value of slot_desc->col_name()
+ Block block;
+ // If block is empty, some functions will produce no result. So we
insert a column with
+ // single value here.
+ block.insert({ColumnUInt8::create(1),
std::make_shared<DataTypeUInt8>(), ""});
+ int result = -1;
+ RETURN_IF_ERROR(ctx->execute(&block, &result));
+ DCHECK(result != -1);
+ auto column = block.get_by_position(result).column;
+ DCHECK(column->size() == 1);
+ _col_default_value_map.emplace(slot_desc->col_name(),
+ column->get_data_at(0).to_string());
+ }
+ }
+ return Status::OK();
+}
+
+Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc,
+ vectorized::IColumn* column_ptr,
bool* valid) {
+ if (slot_desc->is_nullable()) {
+ vectorized::ColumnNullable* nullable_column =
+ reinterpret_cast<vectorized::ColumnNullable*>(column_ptr);
+ column_ptr = &nullable_column->get_nested_column();
+ auto col_value = _col_default_value_map.find(slot_desc->col_name());
+ if (col_value == _col_default_value_map.end()) {
+ nullable_column->insert_default();
+ } else {
+ const std::string& v_str = col_value->second;
+ nullable_column->get_null_map_data().push_back(0);
+ assert_cast<ColumnString*>(column_ptr)->insert_data(v_str.c_str(),
v_str.size());
Review Comment:
Thank you for your reminding, I had tested for default value is
CURRENT_TIMESTAMP , I will add some test case.
##########
be/src/vec/exec/format/json/new_json_reader.cpp:
##########
@@ -1370,53 +1374,40 @@ Status
NewJsonReader::_simdjson_handle_nested_complex_json(
return Status::OK();
}
-size_t NewJsonReader::_column_index(const StringRef& name, size_t key_index) {
Review Comment:
OK,I will add it back
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]