This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new eaab105a394 [fix](variant) fix batch insert into with
structure-conflicts strings (#53923)
eaab105a394 is described below
commit eaab105a3945455f74dd1bb310fbc578531fd312
Author: amory <[email protected]>
AuthorDate: Tue Jul 29 10:06:21 2025 +0800
[fix](variant) fix batch insert into with structure-conflicts strings
(#53923)
fix batch insert into with structure-conflicts strings
Before this we were able to successfully insert, but will meet query
error
like this:
```
mysql> insert into var_nested_load_conflict values (3, '{"nested": [{"a":
2.5, "b": "123.1"}]}'), (4, '{"nested": {"a": 2.5, "b": "123.1"}}');
Query OK, 2 rows affected (0.16 sec)
{'label':'label_9279242ae3fd40e2_aabe077db2d37bb9', 'status':'VISIBLE',
'txnId':'16028'}
mysql> desc var_nested_load_conflict;
+------------+---------------+------+-------+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+------------+---------------+------+-------+---------+-------+
| k | bigint | Yes | true | NULL | |
| v | variant | Yes | false | NULL | NONE |
| v.nested.a | json | Yes | false | NULL | NONE |
| v.nested.b | json | Yes | false | NULL | NONE |
| v.nested.c | array<double> | Yes | false | NULL | NONE |
+------------+---------------+------+-------+---------+-------+
5 rows in set (0.10 sec)
mysql> select * from var_nested_load_conflict;
ERROR 1105 (HY000): errCode = 2, detailMessage =
(10.16.10.6)[INTERNAL_ERROR]Meet none array column when flatten nested array,
path nested.b, type Nullable(JSONB)
```
So we don't allow this kind of insertion
```
mysql> insert into var_nested_load_conflict values (3, '{"nested": [{"a":
2.5, "b": "123.1"}]}'), (4, '{"nested": {"a": 2.5, "b": "123.1"}}');
ERROR 1105 (HY000): errCode = 2, detailMessage =
(10.16.10.6)[DATA_QUALITY_ERROR][E46] Ambiguous paths: nested.b vs nested.b
with different nested part false vs true
```
---
be/src/vec/common/schema_util.cpp | 3 --
be/src/vec/json/parse2column.cpp | 10 ++++++
be/test/common/schema_util_test.cpp | 42 ++++++++++++++++++++++++
regression-test/suites/variant_p0/nested2.groovy | 7 ++++
4 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index 044f6527e72..693451d1cdc 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -267,9 +267,6 @@ Status check_variant_has_no_ambiguous_paths(const
PathsInData& tuple_paths) {
path_groups[tuple_paths[i].get_path()].push_back(i);
// print part of tuple_paths[i]
VLOG_DEBUG << "tuple_paths[i]: " << tuple_paths[i].get_path();
- for (const auto& part : tuple_paths[i].get_parts()) {
- VLOG_DEBUG << "part: " << part.key << ", is_nested: " <<
part.is_nested;
- }
}
// Only compare paths within the same group
diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index 858b0aeb78f..7b84e29504a 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -155,6 +155,16 @@ void parse_json_to_variant(IColumn& column, const char*
src, size_t length,
auto& [paths, values] = *result;
assert(paths.size() == values.size());
size_t old_num_rows = column_variant.size();
+ if (config.enable_flatten_nested) {
+ // here we should check the paths in variant and paths in result,
+ // if two paths which same prefix have different structure, we should
throw an exception
+ std::vector<PathInData> check_paths;
+ for (const auto& entry : column_variant.get_subcolumns()) {
+ check_paths.push_back(entry->path);
+ }
+ check_paths.insert(check_paths.end(), paths.begin(), paths.end());
+
THROW_IF_ERROR(vectorized::schema_util::check_variant_has_no_ambiguous_paths(check_paths));
+ }
for (size_t i = 0; i < paths.size(); ++i) {
FieldInfo field_info;
get_field_info(values[i], &field_info);
diff --git a/be/test/common/schema_util_test.cpp
b/be/test/common/schema_util_test.cpp
index 743db751dc1..2fa3fd11f87 100644
--- a/be/test/common/schema_util_test.cpp
+++ b/be/test/common/schema_util_test.cpp
@@ -19,6 +19,14 @@
#include <gtest/gtest.h>
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_variant.h"
+#include "vec/core/block.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_variant.h"
+#include "vec/json/json_parser.h"
+
namespace doris {
class SchemaUtilTest : public testing::Test {};
@@ -387,4 +395,38 @@ TEST_F(SchemaUtilTest, check_path_conflicts_with_existing)
{
}
}
+TEST_F(SchemaUtilTest, parse_variant_columns_ambiguous_paths) {
+ using namespace doris::vectorized;
+ // Prepare the string column with two rows
+ auto string_col = ColumnString::create();
+ string_col->insert(doris::vectorized::Field::create_field<TYPE_STRING>(
+ String("{\"nested\": [{\"a\": 2.5, \"b\": \"123.1\"}]}")));
+ string_col->insert(doris::vectorized::Field::create_field<TYPE_STRING>(
+ String("{\"nested\": {\"a\": 2.5, \"b\": \"123.1\"}}")));
+ auto string_type = std::make_shared<DataTypeString>();
+
+ // Prepare the variant column with the string column as root
+ vectorized::ColumnVariant::Subcolumns dynamic_subcolumns;
+ dynamic_subcolumns.create_root(
+ vectorized::ColumnVariant::Subcolumn(string_col->assume_mutable(),
string_type, true));
+
+ auto variant_col = ColumnVariant::create(std::move(dynamic_subcolumns),
true);
+ auto variant_type = std::make_shared<DataTypeVariant>();
+
+ // Construct the block
+ Block block;
+ block.insert(
+ vectorized::ColumnWithTypeAndName(variant_col->assume_mutable(),
variant_type, "v"));
+
+ // The variant column is at index 0
+ std::vector<int> variant_pos = {0};
+ ParseConfig config;
+ config.enable_flatten_nested = true;
+
+ // Should throw due to ambiguous paths
+ Status st = schema_util::parse_variant_columns(block, variant_pos, config);
+ EXPECT_FALSE(st.ok());
+ EXPECT_TRUE(st.to_string().find("Ambiguous paths") != std::string::npos);
+}
+
} // namespace doris
diff --git a/regression-test/suites/variant_p0/nested2.groovy
b/regression-test/suites/variant_p0/nested2.groovy
index 75d84a664c2..099b1c903f1 100644
--- a/regression-test/suites/variant_p0/nested2.groovy
+++ b/regression-test/suites/variant_p0/nested2.groovy
@@ -68,6 +68,13 @@ suite("variant_nested_type_conflict", "p0"){
"""
exception "Nesting of array in Nested array within variant
subcolumns is currently not supported."
}
+ // insert batch different structure in same path
+ test {
+ sql """
+ insert into ${table_name} values (3, '{"nested": [{"a": 2.5,
"b": "123.1"}]}'), (4, '{"nested": {"a": 2.5, "b": "123.1"}}');
+ """
+ exception "Ambiguous paths"
+ }
/// insert a array of object for a, b, c
// insert type conflict in multiple rows
sql """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]