This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 378169be1f571f4d16db2d98b418903c8593889f Author: Riza Suminto <[email protected]> AuthorDate: Thu Dec 14 12:46:16 2023 -0800 Revert "Revert "IMPALA-9923: Load ORC serially to hack around ..."" This reverts commit b03e8ef95c856f499d17ea7815831e30e2e9f467. IMPALA-12630 report several tests were broken due to loading ORC in parallel with other non-text table format. ORC tables returns to load serially after this commit. Change-Id: I5d3f2ee1c15f9aff6aa632a78d86ba32c640e53d Reviewed-on: http://gerrit.cloudera.org:8080/20795 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/load-data.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bin/load-data.py b/bin/load-data.py index 090524cf5..a4cfd5a97 100755 --- a/bin/load-data.py +++ b/bin/load-data.py @@ -396,6 +396,7 @@ def main(): impala_create_files = [] hive_load_text_files = [] + hive_load_orc_files = [] hive_load_nontext_files = [] hbase_create_files = [] hbase_postload_files = [] @@ -407,6 +408,8 @@ def main(): elif hive_load_match in filename: if 'text-none-none' in filename: hive_load_text_files.append(filename) + elif 'orc-def-block' in filename: + hive_load_orc_files.append(filename) else: hive_load_nontext_files.append(filename) elif hbase_create_match in filename: @@ -429,6 +432,7 @@ def main(): log_file_list("Impala Create Files:", impala_create_files) log_file_list("Hive Load Text Files:", hive_load_text_files) + log_file_list("Hive Load Orc Files:", hive_load_orc_files) log_file_list("Hive Load Non-Text Files:", hive_load_nontext_files) log_file_list("HBase Create Files:", hbase_create_files) log_file_list("HBase Post-Load Files:", hbase_postload_files) @@ -453,6 +457,13 @@ def main(): # need to be loaded first assert(len(hive_load_text_files) <= 1) hive_exec_query_files_parallel(thread_pool, hive_load_text_files) + # IMPALA-9923: Run ORC serially separately from other non-text formats. This hacks + # around flakiness seen when loading this in parallel. This should be removed as + # soon as possible. + assert(len(hive_load_orc_files) <= 1) + hive_exec_query_files_parallel(thread_pool, hive_load_orc_files) + + # Load all non-text formats (goes parallel) hive_exec_query_files_parallel(thread_pool, hive_load_nontext_files) assert(len(hbase_postload_files) <= 1)
