This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit b03e8ef95c856f499d17ea7815831e30e2e9f467 Author: Riza Suminto <[email protected]> AuthorDate: Wed Nov 29 18:58:37 2023 -0800 Revert "IMPALA-9923: Load ORC serially to hack around flakiness" This reverts commit dc2fdabbd1f2c930348671e17f885c5c54b628e4. Newer hive version and other fixes has allow ORC loading to happen in parallel. Change-Id: I67f4051dd07273f2b51843cb5c1ec2cf185c5924 Reviewed-on: http://gerrit.cloudera.org:8080/20755 Reviewed-by: Riza Suminto <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/load-data.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/bin/load-data.py b/bin/load-data.py index a4cfd5a97..090524cf5 100755 --- a/bin/load-data.py +++ b/bin/load-data.py @@ -396,7 +396,6 @@ def main(): impala_create_files = [] hive_load_text_files = [] - hive_load_orc_files = [] hive_load_nontext_files = [] hbase_create_files = [] hbase_postload_files = [] @@ -408,8 +407,6 @@ def main(): elif hive_load_match in filename: if 'text-none-none' in filename: hive_load_text_files.append(filename) - elif 'orc-def-block' in filename: - hive_load_orc_files.append(filename) else: hive_load_nontext_files.append(filename) elif hbase_create_match in filename: @@ -432,7 +429,6 @@ def main(): log_file_list("Impala Create Files:", impala_create_files) log_file_list("Hive Load Text Files:", hive_load_text_files) - log_file_list("Hive Load Orc Files:", hive_load_orc_files) log_file_list("Hive Load Non-Text Files:", hive_load_nontext_files) log_file_list("HBase Create Files:", hbase_create_files) log_file_list("HBase Post-Load Files:", hbase_postload_files) @@ -457,13 +453,6 @@ def main(): # need to be loaded first assert(len(hive_load_text_files) <= 1) hive_exec_query_files_parallel(thread_pool, hive_load_text_files) - # IMPALA-9923: Run ORC serially separately from other non-text formats. This hacks - # around flakiness seen when loading this in parallel. This should be removed as - # soon as possible. - assert(len(hive_load_orc_files) <= 1) - hive_exec_query_files_parallel(thread_pool, hive_load_orc_files) - - # Load all non-text formats (goes parallel) hive_exec_query_files_parallel(thread_pool, hive_load_nontext_files) assert(len(hbase_postload_files) <= 1)
