This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a9ea138caf [fix](two level hash table) fix dead loop when converting
to two level hash table for zero value (#21899)
a9ea138caf is described below
commit a9ea138cafa1eac203a2515a70f872afbcc154a6
Author: TengJianPing <[email protected]>
AuthorDate: Tue Jul 18 19:50:30 2023 +0800
[fix](two level hash table) fix dead loop when converting to two level hash
table for zero value (#21899)
When enable two level hash table , if there is zero value in the existing
one level hash table, it will cause dead loop when converting to two level hash
table, because the PartitionedHashTable::_is_partitioned flag is not set
correctly when doing the converting.
---
.../vec/common/hash_table/partitioned_hash_table.h | 4 +-
.../query_p0/join/test_partitioned_hash_join.out | 7 +
.../query_p0/join/test_partitioned_hash_join_r.csv | 257 +++++++++++++++++++++
.../join/test_partitioned_hash_join.groovy | 79 +++++++
4 files changed, 346 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h
b/be/src/vec/common/hash_table/partitioned_hash_table.h
index 9990c4491e..756cbe7a73 100644
--- a/be/src/vec/common/hash_table/partitioned_hash_table.h
+++ b/be/src/vec/common/hash_table/partitioned_hash_table.h
@@ -563,6 +563,9 @@ private:
void convert_to_partitioned() {
SCOPED_RAW_TIMER(&_convert_timer_ns);
+ DCHECK(!_is_partitioned);
+ _is_partitioned = true;
+
auto bucket_count = level0_sub_table.get_buffer_size_in_cells();
for (size_t i = 0; i < NUM_LEVEL1_SUB_TABLES; ++i) {
level1_sub_tables[i] = std::move(Impl(bucket_count /
NUM_LEVEL1_SUB_TABLES));
@@ -592,7 +595,6 @@ private:
}
}
- _is_partitioned = true;
level0_sub_table.clear_and_shrink();
}
diff --git a/regression-test/data/query_p0/join/test_partitioned_hash_join.out
b/regression-test/data/query_p0/join/test_partitioned_hash_join.out
new file mode 100644
index 0000000000..674e06bfed
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_partitioned_hash_join.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !partitioned_hash_join1 --
+0
+1
+100
+255
+
diff --git
a/regression-test/data/query_p0/join/test_partitioned_hash_join_r.csv
b/regression-test/data/query_p0/join/test_partitioned_hash_join_r.csv
new file mode 100644
index 0000000000..db6f297c34
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_partitioned_hash_join_r.csv
@@ -0,0 +1,257 @@
+0,0
+1,1
+2,2
+3,3
+4,4
+5,5
+6,6
+7,7
+8,8
+9,9
+10,10
+11,11
+12,12
+13,13
+14,14
+15,15
+16,16
+17,17
+18,18
+19,19
+20,20
+21,21
+22,22
+23,23
+24,24
+25,25
+26,26
+27,27
+28,28
+29,29
+30,30
+31,31
+32,32
+33,33
+34,34
+35,35
+36,36
+37,37
+38,38
+39,39
+40,40
+41,41
+42,42
+43,43
+44,44
+45,45
+46,46
+47,47
+48,48
+49,49
+50,50
+51,51
+52,52
+53,53
+54,54
+55,55
+56,56
+57,57
+58,58
+59,59
+60,60
+61,61
+62,62
+63,63
+64,64
+65,65
+66,66
+67,67
+68,68
+69,69
+70,70
+71,71
+72,72
+73,73
+74,74
+75,75
+76,76
+77,77
+78,78
+79,79
+80,80
+81,81
+82,82
+83,83
+84,84
+85,85
+86,86
+87,87
+88,88
+89,89
+90,90
+91,91
+92,92
+93,93
+94,94
+95,95
+96,96
+97,97
+98,98
+99,99
+100,100
+101,101
+102,102
+103,103
+104,104
+105,105
+106,106
+107,107
+108,108
+109,109
+110,110
+111,111
+112,112
+113,113
+114,114
+115,115
+116,116
+117,117
+118,118
+119,119
+120,120
+121,121
+122,122
+123,123
+124,124
+125,125
+126,126
+127,127
+128,128
+129,129
+130,130
+131,131
+132,132
+133,133
+134,134
+135,135
+136,136
+137,137
+138,138
+139,139
+140,140
+141,141
+142,142
+143,143
+144,144
+145,145
+146,146
+147,147
+148,148
+149,149
+150,150
+151,151
+152,152
+153,153
+154,154
+155,155
+156,156
+157,157
+158,158
+159,159
+160,160
+161,161
+162,162
+163,163
+164,164
+165,165
+166,166
+167,167
+168,168
+169,169
+170,170
+171,171
+172,172
+173,173
+174,174
+175,175
+176,176
+177,177
+178,178
+179,179
+180,180
+181,181
+182,182
+183,183
+184,184
+185,185
+186,186
+187,187
+188,188
+189,189
+190,190
+191,191
+192,192
+193,193
+194,194
+195,195
+196,196
+197,197
+198,198
+199,199
+200,200
+201,201
+202,202
+203,203
+204,204
+205,205
+206,206
+207,207
+208,208
+209,209
+210,210
+211,211
+212,212
+213,213
+214,214
+215,215
+216,216
+217,217
+218,218
+219,219
+220,220
+221,221
+222,222
+223,223
+224,224
+225,225
+226,226
+227,227
+228,228
+229,229
+230,230
+231,231
+232,232
+233,233
+234,234
+235,235
+236,236
+237,237
+238,238
+239,239
+240,240
+241,241
+242,242
+243,243
+244,244
+245,245
+246,246
+247,247
+248,248
+249,249
+250,250
+251,251
+252,252
+253,253
+254,254
+255,255
+256,256
diff --git
a/regression-test/suites/query_p0/join/test_partitioned_hash_join.groovy
b/regression-test/suites/query_p0/join/test_partitioned_hash_join.groovy
new file mode 100644
index 0000000000..bad3af4c69
--- /dev/null
+++ b/regression-test/suites/query_p0/join/test_partitioned_hash_join.groovy
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_partitioned_hash_join", "query,p0") {
+ sql "drop table if exists test_partitioned_hash_join_l"
+ sql "drop table if exists test_partitioned_hash_join_r"
+ sql """ create table test_partitioned_hash_join_l (
+ kl1 int, vl1 int
+ ) distributed by hash(vl1) properties("replication_num"="1");
+ """
+ sql """ create table test_partitioned_hash_join_r (
+ kr1 int, vr1 int
+ ) distributed by hash(vr1) properties("replication_num"="1");
+ """
+ streamLoad {
+ // you can skip declare db, because a default db already specify in
${DORIS_HOME}/conf/regression-conf.groovy
+ // db 'regression_test'
+ table "test_partitioned_hash_join_r"
+
+ // default label is UUID:
+ // set 'label' UUID.randomUUID().toString()
+
+ // default column_separator is specify in doris fe config, usually is
'\t'.
+ // this line change to ','
+ set 'column_separator', ','
+ set 'timeout', '72000'
+ // relate to
${DORIS_HOME}/regression-test/data/demo/streamload_input.csv.
+ // also, you can stream load a http stream, e.g. http://xxx/some.csv
+ file """test_partitioned_hash_join_r.csv"""
+ time 3000 // limit inflight 3s
+
+ // stream load action will check result, include Success status, and
NumberTotalRows == NumberLoadedRows
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+ assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+ }
+ }
+ sql "insert into test_partitioned_hash_join_l values (100, 1100), (0, 10),
(1, 110), (255, 2550)";
+
+ qt_partitioned_hash_join1 """
+ select
+
/*+SET_VAR(disable_join_reorder=true,experimental_enable_pipeline_engine=false,
parallel_fragment_exec_instance_num=1, partitioned_hash_join_rows_threshold =
1)*/
+ kl1
+ from
+ test_partitioned_hash_join_l
+ where
+ kl1 in (
+ select
+ kr1
+ from
+ test_partitioned_hash_join_r
+ order by
+ kr1
+ ) order by kl1;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]