This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new edfa1b4f7ee [fix](array) fix add_array_null_value for inverted
index_builder (#42638)
edfa1b4f7ee is described below
commit edfa1b4f7eea32ab7d515ca264af7ccce971bd74
Author: amory <[email protected]>
AuthorDate: Wed Oct 30 08:23:16 2024 +0800
[fix](array) fix add_array_null_value for inverted index_builder (#42638)
in index_builder.cpp we should also handle function add_array_null which
happend in alter statement
---
be/src/olap/task/index_builder.cpp | 7 +
.../array_contains/arr_null_test_data.csv | 200 +++++++++++++++++++++
.../array_contains/test_add_index_for_arr.groovy | 155 ++++++++++++++++
3 files changed, 362 insertions(+)
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index 38a52d1d211..c1c94a439c8 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -555,6 +555,13 @@ Status IndexBuilder::_add_nullable(const std::string&
column_name,
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"CLuceneError occured: {}", e.what());
}
+ // we should refresh nullmap for array
+ for (int row_id = 0; row_id < num_rows; row_id++) {
+ if (null_map && null_map[row_id] == 1) {
+ RETURN_IF_ERROR(
+
_inverted_index_builders[index_writer_sign]->add_array_nulls(row_id));
+ }
+ }
return Status::OK();
}
diff --git
a/regression-test/data/inverted_index_p0/array_contains/arr_null_test_data.csv
b/regression-test/data/inverted_index_p0/array_contains/arr_null_test_data.csv
new file mode 100644
index 00000000000..2258751c931
--- /dev/null
+++
b/regression-test/data/inverted_index_p0/array_contains/arr_null_test_data.csv
@@ -0,0 +1,200 @@
+0|NULL|NULL
+1|[]|NULL
+2|NULL|[]
+3|[null, 'None', null, null, 'None', 'text5', null]|[]
+4|[null, 'None', 'None', null, 'text4', null, 'None']|NULL
+5|NULL|NULL
+6|NULL|[]
+7|NULL|NULL
+8|[null, 'None', 'None', null, 'text4', 'None', null, 'text7', null]|[]
+9|NULL|[]
+10|['None', 'text1', 'None', 'text3', 'text4', null, 'text6', null]|['text0',
'None', null, 'text3', 'None']
+11|[null, 'None', null, 'None', null, 'text5', null, null, 'text8']|[null,
null, null, 'text3', null, 'text5', null]
+12|[]|['text0', 'text1', null, 'text3', 'text4', 'None', 'text6', null]
+13|[]|[]
+14|NULL|[null, null, null, 'None', 'None']
+15|NULL|NULL
+16|NULL|NULL
+17|NULL|['text0', null, null, null, 'text4', 'text5', null, null]
+18|[null, 'None', 'None', 'None', 'None', null, 'text6', 'text7', null,
null]|NULL
+19|[]|[]
+20|NULL|[]
+21|NULL|NULL
+22|NULL|NULL
+23|NULL|NULL
+24|NULL|[]
+25|NULL|[]
+26|NULL|['text0', 'text1', 'text2', 'text3', null, null, null]
+27|[]|NULL
+28|NULL|NULL
+29|NULL|[]
+30|[]|[null, null, null, 'text3', 'None', 'None', null, null, null, null]
+31|NULL|NULL
+32|NULL|NULL
+33|[]|[]
+34|['None', null, null, 'text3', 'None', null, null, null, 'text8']|[null,
'None', 'None', 'None', 'None', 'None', null]
+35|[]|NULL
+36|NULL|[null, 'None', null, 'text3', 'text4', null, 'text6', null, 'None',
'text9']
+37|['text0', null, 'text2', null, 'text4', 'text5']|NULL
+38|[]|[null, null, null, 'None', 'text4']
+39|[]|['text0', 'text1', null, 'None', 'None']
+40|NULL|['None', null, 'text2', 'text3', 'text4', 'None', 'None', null, null,
null]
+41|[null, 'text1', 'text2', 'None', 'None', 'None', null, null, null]|[null,
'None', null, null, 'text4', 'text5', null, 'text7', null, 'None']
+42|NULL|NULL
+43|NULL|NULL
+44|NULL|[]
+45|['None', null, null, 'None', null, 'None', 'None', 'None']|[]
+46|[null, null, 'text2', null, null, null, 'text6', null, 'text8']|NULL
+47|NULL|NULL
+48|NULL|[]
+49|NULL|NULL
+50|['text0', null, 'None', null, null, null, null, 'None']|[]
+51|NULL|NULL
+52|NULL|['text0', 'None', 'None', 'None', 'text4']
+53|[]|NULL
+54|['text0', 'None', 'text2', null, 'None', 'None', null, null]|NULL
+55|['None', null, null, null, null, 'None', null]|[]
+56|NULL|[]
+57|['None', 'text1', null, null, null, null, 'None', null]|[null, null,
'None', 'text3', 'None', null, 'None', 'None']
+58|NULL|[]
+59|NULL|NULL
+60|[]|NULL
+61|[null, 'text1', null, 'None', 'text4', null]|[]
+62|NULL|NULL
+63|[null, null, 'text2', null, 'None', null, null, null, null]|[null, 'text1',
'None', 'text3', null, null, 'text6']
+64|['text0', null, null, null, null, 'text5']|NULL
+65|NULL|['text0', 'None', 'None', 'None', null, 'None', null, 'None', 'text8',
'None']
+66|NULL|NULL
+67|[]|NULL
+68|['None', null, null, 'text3', 'None', null, 'text6', 'None']|NULL
+69|NULL|['text0', null, null, 'None', 'text4', 'text5', null]
+70|['text0', null, null, null, 'text4']|[]
+71|NULL|[null, 'text1', null, null, 'text4', 'text5']
+72|NULL|[]
+73|NULL|[]
+74|[null, 'None', null, 'None', null, null]|[null, 'text1', 'None', null,
'text4', null]
+75|NULL|NULL
+76|[]|[]
+77|[]|[]
+78|[null, null, 'None', 'None', null, null]|['None', 'text1', null, 'None',
null, null, null, 'None', null]
+79|NULL|[]
+80|[]|[]
+81|[]|NULL
+82|NULL|NULL
+83|['None', 'None', null, 'None', 'None', 'text5', null, null, null,
'text9']|['None', null, 'None', null, 'text4', 'None', 'None', 'text7', 'None']
+84|NULL|['None', 'None', 'text2', null, 'None', null, null, 'None']
+85|NULL|[null, null, 'text2', 'text3', null, 'text5']
+86|[]|['text0', null, null, 'text3', null, 'text5']
+87|NULL|['None', null, null, null, 'None']
+88|NULL|[]
+89|[null, 'None', 'None', null, 'text4', null, 'text6']|[]
+90|NULL|['None', null, 'None', null, 'None', 'text5', 'text6', 'text7', null]
+91|NULL|[null, 'None', null, 'text3', null, null, 'text6', 'text7', 'None']
+92|NULL|['None', 'None', 'None', null, null, 'text5', null, null]
+93|[]|[]
+94|[null, null, 'None', 'text3', 'text4', null]|[]
+95|NULL|[null, 'None', 'None', null, 'None', null, null, 'None']
+96|NULL|NULL
+97|[]|NULL
+98|[]|NULL
+99|['text0', 'text1', null, 'text3', 'text4', null, null, null, null,
'None']|['None', 'text1', 'text2', 'text3', null, null, 'None', null]
+100|NULL|NULL
+101|NULL|[null, null, null, null, 'text4', null, 'None', 'None']
+102|[null, 'None', 'None', 'text3', 'None', null, null, null, null]|['None',
null, null, 'text3', null]
+103|NULL|NULL
+104|NULL|[null, 'None', 'text2', null, 'None', null]
+105|[null, null, 'text2', 'text3', null]|NULL
+106|NULL|[]
+107|NULL|['text0', null, null, null, 'None']
+108|NULL|NULL
+109|NULL|[]
+110|[null, 'text1', 'None', null, 'None', null, 'text6', 'text7']|[]
+111|[null, null, 'None', 'text3', null, 'None']|[null, 'text1', null, null,
'None', null]
+112|NULL|[null, 'None', null, null, 'text4', 'text5', 'text6', 'None']
+113|NULL|[]
+114|NULL|['None', null, null, null, null, 'text5']
+115|[null, null, 'None', 'text3', null]|['text0', 'text1', null, null, null]
+116|NULL|NULL
+117|NULL|[]
+118|['text0', null, null, null, 'None', 'text5', 'text6', 'None', 'text8']|NULL
+119|[null, 'text1', 'text2', 'None', null, null, 'text6', 'None']|NULL
+120|[null, 'None', null, 'None', null, null, null, null, null]|['text0',
'text1', 'None', 'None', null, 'None', null, null]
+121|NULL|[null, null, 'text2', null, 'None']
+122|NULL|[]
+123|['None', 'text1', 'None', 'None', null, 'text5']|[null, 'None', null,
null, 'None', 'None', null]
+124|[]|[null, 'None', null, null, null, null]
+125|NULL|NULL
+126|NULL|[]
+127|NULL|NULL
+128|NULL|[]
+129|['text0', 'None', 'None', 'text3', null, 'text5']|['None', 'text1',
'text2', 'None', null, null, 'None', null, 'None']
+130|NULL|NULL
+131|['text0', 'text1', 'text2', 'None', null, null, 'None']|NULL
+132|NULL|NULL
+133|NULL|NULL
+134|[]|[]
+135|NULL|NULL
+136|NULL|[null, null, null, null, null, 'None', 'text6', null]
+137|[]|NULL
+138|NULL|[]
+139|NULL|['text0', null, 'text2', null, 'text4', null, null, 'None', 'text8',
null]
+140|[]|[]
+141|[]|[]
+142|NULL|NULL
+143|NULL|NULL
+144|[]|['None', 'text1', null, null, 'text4', null, 'text6', null, null,
'None']
+145|[]|NULL
+146|[]|['text0', 'None', null, null, null, 'None', null]
+147|NULL|NULL
+148|['None', 'None', null, 'None', 'text4', 'None']|['None', null, null, null,
'None']
+149|NULL|['None', null, null, 'None', 'None', null, null, 'text7']
+150|['text0', 'text1', 'None', 'text3', 'text4', 'None']|[]
+151|[]|[]
+152|NULL|NULL
+153|NULL|NULL
+154|[null, 'text1', null, null, null, 'None', 'None', 'text7']|[null, null,
null, null, 'text4', 'text5', 'text6']
+155|NULL|['None', 'text1', 'text2', 'text3', 'None', null, 'None', 'text7',
'text8', 'None']
+156|[null, 'None', null, 'text3', null, 'text5', 'text6', null]|NULL
+157|NULL|['None', 'None', 'text2', null, null]
+158|[]|[]
+159|NULL|[]
+160|NULL|NULL
+161|[]|[]
+162|[null, 'None', null, 'None', 'text4']|NULL
+163|NULL|NULL
+164|['None', 'None', 'None', null, null, null, 'text6', null, null]|NULL
+165|[]|NULL
+166|NULL|[]
+167|NULL|NULL
+168|[]|[]
+169|[]|['text0', 'text1', null, null, 'text4', null]
+170|NULL|[null, null, 'None', 'None', 'None', 'text5', 'None', null, 'None',
'None']
+171|NULL|NULL
+172|[]|NULL
+173|NULL|[]
+174|['None', 'text1', null, 'None', 'text4', null, null, 'None', 'text8']|[]
+175|NULL|NULL
+176|[]|NULL
+177|NULL|[]
+178|[]|[]
+179|NULL|NULL
+180|[null, null, null, null, 'text4', null, null, null, 'text8']|NULL
+181|[]|[null, 'None', null, null, 'text4', null, 'text6', 'None']
+182|NULL|['None', 'text1', null, 'None', 'None', null, 'None', 'text7']
+183|NULL|[null, null, 'None', 'text3', null, 'None', null, 'None', null, null]
+184|[null, 'None', null, 'None', null, 'text5', 'None', 'text7']|NULL
+185|[]|NULL
+186|NULL|[null, null, null, 'text3', 'text4']
+187|[null, null, 'None', 'text3', 'text4']|NULL
+188|[]|[]
+189|[null, null, 'text2', null, 'None', null, null, 'None', 'text8']|NULL
+190|['None', null, 'None', null, 'None', 'text5', 'None', null, 'text8',
null]|[]
+191|NULL|NULL
+192|NULL|NULL
+193|['None', null, 'text2', 'None', null]|NULL
+194|NULL|[]
+195|[]|NULL
+196|NULL|NULL
+197|[null, 'text1', 'text2', 'None', 'text4', null, null]|[null, 'None',
'None', 'None', null, null, null, 'text7', null, null]
+198|NULL|NULL
+199|[]|[null, 'None', 'None', null, null, null, null, null, null, 'text9']
diff --git
a/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy
b/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy
new file mode 100644
index 00000000000..6f3e772dd08
--- /dev/null
+++
b/regression-test/suites/inverted_index_p0/array_contains/test_add_index_for_arr.groovy
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_add_index_for_arr") {
+ // prepare test table
+ def timeout = 60000
+ def delta_time = 1000
+ def alter_res = "null"
+ def useTime = 0
+ // here some variable to control inverted index query
+ sql """ set enable_profile=true"""
+ sql """ set enable_pipeline_x_engine=true;"""
+ sql """ set enable_inverted_index_query=false"""
+ sql """ set enable_common_expr_pushdown=true """
+
+ def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+ for(int t = delta_time; t <= OpTimeout; t += delta_time){
+ alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName =
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+ alter_res = alter_res.toString()
+ if(alter_res.contains("FINISHED")) {
+ sleep(3000) // wait change table state to normal
+ logger.info(table_name + " latest alter job finished, detail:
" + alter_res)
+ break
+ }
+ useTime = t
+ sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish
timeout")
+ }
+
+ def wait_for_build_index_on_partition_finish = { table_name, OpTimeout ->
+ for(int t = delta_time; t <= OpTimeout; t += delta_time){
+ alter_res = sql """SHOW BUILD INDEX WHERE TableName =
"${table_name}";"""
+ def expected_finished_num = alter_res.size();
+ def finished_num = 0;
+ for (int i = 0; i < expected_finished_num; i++) {
+ logger.info(table_name + " build index job state: " +
alter_res[i][7] + i)
+ if (alter_res[i][7] == "FINISHED") {
+ ++finished_num;
+ }
+ }
+ if (finished_num == expected_finished_num) {
+ logger.info(table_name + " all build index jobs finished,
detail: " + alter_res)
+ break
+ }
+ useTime = t
+ sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout,
"wait_for_latest_build_index_on_partition_finish timeout")
+ }
+
+
+ sql "DROP TABLE IF EXISTS my_test_array"
+ // create table without any index
+ sql """
+ CREATE TABLE IF NOT EXISTS my_test_array (
+ `id` int(11) NULL,
+ `name` ARRAY<text> NULL,
+ `description` ARRAY<text> NULL,
+ )
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ properties("replication_num" = "1");
+ """
+
+ def var_result = sql "show variables"
+ logger.info("show variales result: " + var_result )
+
+ // stream_load with csv data
+ streamLoad {
+ table "my_test_array"
+
+ file "arr_null_test_data.csv" // import csv file
+ time 10000 // limit inflight 10s
+ set 'column_separator', '|'
+ set 'format', 'csv'
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals(200, json.NumberTotalRows)
+ assertEquals(200, json.NumberLoadedRows)
+ assertTrue(json.LoadBytes > 0)
+ }
+ }
+
+ // query without inverted index
+ // query rows with array_contains
+ def sql_query_name1 = sql "select id, name[1], description[1] from
my_test_array where array_contains(name,'text7')"
+ // query rows with !array_contains
+ def sql_query_name2 = sql "select id, name[1], description[1] from
my_test_array where !array_contains(name,'text7')"
+
+ // add index for name
+ sql "ALTER TABLE my_test_array ADD INDEX name_idx (name) USING INVERTED;"
+ wait_for_latest_op_on_table_finish("my_test_array", timeout)
+ // build index for name that name data can using inverted index
+ if (!isCloudMode()) {
+ sql "BUILD INDEX name_idx ON my_test_array"
+ wait_for_build_index_on_partition_finish("my_test_array", timeout)
+ }
+
+ // query with inverted index
+ sql "set enable_inverted_index_query=true"
+ // query rows with array_contains
+ def sql_query_name1_inverted = sql "select id, name[1], description[1]
from my_test_array where array_contains(name,'text7')"
+ // query rows with !array_contains
+ def sql_query_name2_inverted = sql "select id, name[1], description[1]
from my_test_array where !array_contains(name,'text7')"
+
+ // check result for query without inverted index and with inverted index
+ def size1 = sql_query_name1.size();
+ log.info("sql_query_name1 query without inverted index rows size:
${size1}")
+ for (int i = 0; i < sql_query_name1.size(); i++) {
+ assertEquals(sql_query_name1[i][0], sql_query_name1_inverted[i][0])
+ assertEquals(sql_query_name1[i][1], sql_query_name1_inverted[i][1])
+ assertEquals(sql_query_name1[i][2], sql_query_name1_inverted[i][2])
+ }
+ def size2 = sql_query_name2.size();
+ log.info("sql_query_name2 query without inverted index rows size:
${size2}")
+ for (int i = 0; i < sql_query_name2.size(); i++) {
+ assertEquals(sql_query_name2[i][0], sql_query_name2_inverted[i][0])
+ assertEquals(sql_query_name2[i][1], sql_query_name2_inverted[i][1])
+ assertEquals(sql_query_name2[i][2], sql_query_name2_inverted[i][2])
+ }
+
+ // drop index
+ // add index on column description
+ sql "drop index name_idx on my_test_array"
+ wait_for_latest_op_on_table_finish("my_test_array", timeout)
+
+ def sql_query_name1_without_inverted = sql "select id, name[1],
description[1] from my_test_array where array_contains(name,'text7')"
+ def sql_query_name2_without_inverted = sql "select id, name[1],
description[1] from my_test_array where !array_contains(name,'text7')"
+
+ assertEquals(sql_query_name1.size(),
sql_query_name1_without_inverted.size())
+ assertEquals(sql_query_name2.size(),
sql_query_name2_without_inverted.size())
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]