This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit eb66d00f9f43ddaa6a9547574a150e0b1436f4d4 Author: Joe McDonnell <[email protected]> AuthorDate: Sat Mar 4 09:02:36 2023 -0800 IMPALA-11974: Fix lazy list operators for Python 3 compatibility Python 3 changes list operators such as range, map, and filter to be lazy. Some code that expects the list operators to happen immediately will fail. e.g. Python 2: range(0,5) == [0,1,2,3,4] True Python 3: range(0,5) == [0,1,2,3,4] False The fix is to wrap locations with list(). i.e. Python 3: list(range(0,5)) == [0,1,2,3,4] True Since the base operators are now lazy, Python 3 also removes the old lazy versions (e.g. xrange, ifilter, izip, etc). This uses future's builtins package to convert the code to the Python 3 behavior (i.e. xrange -> future's builtins.range). Most of the changes were done via these futurize fixes: - libfuturize.fixes.fix_xrange_with_import - lib2to3.fixes.fix_map - lib2to3.fixes.fix_filter This eliminates the pylint warnings: - xrange-builtin - range-builtin-not-iterating - map-builtin-not-iterating - zip-builtin-not-iterating - filter-builtin-not-iterating - reduce-builtin - deprecated-itertools-function Testing: - Ran core job Change-Id: Ic7c082711f8eff451a1b5c085e97461c327edb5f Reviewed-on: http://gerrit.cloudera.org:8080/19589 Reviewed-by: Joe McDonnell <[email protected]> Tested-by: Joe McDonnell <[email protected]> --- bin/banned_py3k_warnings.txt | 7 ++++++ bin/generate_minidump_collection_testdata.py | 5 ++-- bin/get_code_size.py | 1 + bin/load-data.py | 2 +- bin/run-workload.py | 6 ++--- bin/single_node_perf_run.py | 1 + bin/start-impala-cluster.py | 5 ++-- testdata/bin/generate-schema-statements.py | 2 +- testdata/bin/generate-test-vectors.py | 1 + testdata/bin/load_nested.py | 5 ++-- testdata/bin/random_avro_schema.py | 1 + testdata/bin/rewrite-iceberg-metadata.py | 12 +++++----- testdata/common/cgroups.py | 1 + testdata/common/text_delims_table.py | 3 ++- testdata/common/widetable.py | 7 +++--- tests/authorization/test_ranger.py | 3 ++- tests/beeswax/impala_beeswax.py | 5 ++-- tests/benchmark/plugins/vtune_plugin.py | 3 ++- tests/benchmark/report_benchmark_results.py | 3 ++- tests/common/environ.py | 2 +- tests/common/impala_cluster.py | 1 + tests/common/impala_test_suite.py | 3 ++- tests/common/kudu_test_suite.py | 5 ++-- tests/common/test_dimensions.py | 3 ++- tests/common/test_result_verifier.py | 13 ++++++----- tests/comparison/cluster.py | 8 +++---- tests/comparison/data_generator.py | 8 ++++--- tests/comparison/data_generator_mapred_common.py | 5 ++-- tests/comparison/db_connection.py | 9 ++++---- tests/comparison/discrepancy_searcher.py | 16 ++++++------- tests/comparison/funcs.py | 4 ++-- tests/comparison/query.py | 3 ++- tests/comparison/query_generator.py | 18 +++++++-------- tests/comparison/query_profile.py | 12 ++++++---- tests/comparison/statement_generator.py | 3 ++- tests/conftest.py | 5 ++-- tests/custom_cluster/test_admission_controller.py | 19 +++++++-------- tests/custom_cluster/test_auto_scaling.py | 1 + tests/custom_cluster/test_blacklist.py | 3 ++- tests/custom_cluster/test_breakpad.py | 5 ++-- tests/custom_cluster/test_codegen_cache.py | 1 + tests/custom_cluster/test_concurrent_ddls.py | 7 +++--- .../custom_cluster/test_concurrent_kudu_create.py | 3 ++- tests/custom_cluster/test_custom_statestore.py | 3 ++- tests/custom_cluster/test_events_custom_configs.py | 3 ++- .../test_exchange_deferred_batches.py | 1 + tests/custom_cluster/test_executor_groups.py | 3 ++- tests/custom_cluster/test_hdfs_fd_caching.py | 1 + .../test_incremental_metadata_updates.py | 1 + tests/custom_cluster/test_local_catalog.py | 7 +++--- tests/custom_cluster/test_mem_reservations.py | 5 ++-- tests/custom_cluster/test_metadata_replicas.py | 5 ++-- tests/custom_cluster/test_metastore_service.py | 1 + .../custom_cluster/test_parquet_max_page_header.py | 5 ++-- tests/custom_cluster/test_preload_table_types.py | 1 + tests/custom_cluster/test_process_failures.py | 5 ++-- tests/custom_cluster/test_query_expiration.py | 9 ++++---- tests/custom_cluster/test_query_retries.py | 7 +++--- tests/custom_cluster/test_restart_services.py | 5 ++-- tests/custom_cluster/test_rpc_timeout.py | 3 ++- tests/custom_cluster/test_scratch_disk.py | 3 ++- tests/custom_cluster/test_set_and_unset.py | 4 ++-- .../custom_cluster/test_topic_update_frequency.py | 1 + tests/custom_cluster/test_udf_concurrency.py | 5 ++-- tests/custom_cluster/test_wide_table_operations.py | 1 + tests/failure/test_failpoints.py | 1 + tests/hs2/hs2_test_suite.py | 3 ++- tests/hs2/test_fetch_first.py | 13 ++++++----- tests/hs2/test_hs2.py | 5 ++-- tests/metadata/test_compute_stats.py | 3 ++- tests/metadata/test_ddl.py | 25 ++++++++++---------- tests/metadata/test_hms_integration.py | 5 ++-- tests/metadata/test_load.py | 7 +++--- tests/metadata/test_recover_partitions.py | 15 ++++++------ tests/metadata/test_recursive_listing.py | 1 + tests/metadata/test_stats_extrapolation.py | 5 ++-- tests/performance/scheduler.py | 7 +++--- tests/query_test/test_aggregation.py | 9 ++++---- tests/query_test/test_avro_schema_resolution.py | 1 + tests/query_test/test_cancellation.py | 7 +++--- tests/query_test/test_cast_with_format.py | 1 + tests/query_test/test_compressed_formats.py | 1 + tests/query_test/test_decimal_casting.py | 13 ++++++----- tests/query_test/test_decimal_fuzz.py | 7 +++--- tests/query_test/test_exprs.py | 13 ++++++----- tests/query_test/test_hdfs_caching.py | 3 ++- tests/query_test/test_iceberg.py | 1 + tests/query_test/test_insert_behaviour.py | 1 + tests/query_test/test_insert_parquet.py | 6 +++-- tests/query_test/test_insert_permutation.py | 5 ++-- tests/query_test/test_kudu.py | 1 + tests/query_test/test_mem_usage_scaling.py | 1 + tests/query_test/test_parquet_bloom_filter.py | 1 + tests/query_test/test_scanners.py | 1 + tests/query_test/test_scanners_fuzz.py | 3 ++- tests/query_test/test_sort.py | 18 ++++++++++++--- tests/query_test/test_tpch_queries.py | 3 ++- tests/shell/test_shell_commandline.py | 3 ++- tests/statestore/test_statestore.py | 7 +++--- tests/stress/concurrent_select.py | 9 ++++---- tests/stress/queries.py | 3 ++- tests/stress/query_retries_stress_runner.py | 7 +++--- tests/stress/test_acid_stress.py | 27 ++++++++++++---------- tests/stress/test_ddl_stress.py | 5 ++-- tests/stress/test_insert_stress.py | 14 ++++++----- tests/util/calculation_util.py | 1 + tests/util/concurrent_workload.py | 3 ++- tests/util/get_parquet_metadata.py | 4 +++- tests/util/ssh_util.py | 1 + tests/util/test_file_parser.py | 10 +++++--- 110 files changed, 358 insertions(+), 231 deletions(-) diff --git a/bin/banned_py3k_warnings.txt b/bin/banned_py3k_warnings.txt index c5e7bdc12..01d54fe73 100644 --- a/bin/banned_py3k_warnings.txt +++ b/bin/banned_py3k_warnings.txt @@ -1,2 +1,9 @@ no-absolute-import old-division +xrange-builtin +range-builtin-not-iterating +map-builtin-not-iterating +zip-builtin-not-iterating +filter-builtin-not-iterating +reduce-builtin +deprecated-itertools-function diff --git a/bin/generate_minidump_collection_testdata.py b/bin/generate_minidump_collection_testdata.py index 09341f539..9149e814d 100755 --- a/bin/generate_minidump_collection_testdata.py +++ b/bin/generate_minidump_collection_testdata.py @@ -28,6 +28,7 @@ # making the files easily compressible by having some repeated data. from __future__ import absolute_import, division, print_function +from builtins import range import errno import os import random @@ -99,7 +100,7 @@ def random_bytes(num): def write_minidump(common_data, timestamp, target_dir): '''Generate and write the minidump into the target_dir. atime and mtime of the minidump will be set to timestamp.''' - file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in xrange(10)) + file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(10)) with open(os.path.join(target_dir, file_name), 'wb') as f: # We want the minidump to be pretty similar to each other. The number 8192 was chosen # arbitratily and seemed like a reasonable guess. @@ -130,7 +131,7 @@ def generate_minidumps(): interval = 0 else: interval = (end_timestamp - start_timestamp) // (options.num_minidumps - 1) - for i in xrange(options.num_minidumps): + for i in range(options.num_minidumps): write_minidump(common_data, start_timestamp + interval * i, os.path.join(minidump_dir, role_name)) diff --git a/bin/get_code_size.py b/bin/get_code_size.py index 44afba04f..c69007057 100755 --- a/bin/get_code_size.py +++ b/bin/get_code_size.py @@ -20,6 +20,7 @@ # This tool walks the build directory (release by default) and will print the text, data, # and bss section sizes of the archives. from __future__ import absolute_import, division, print_function +from builtins import range import fnmatch import os import re diff --git a/bin/load-data.py b/bin/load-data.py index 3c35cfc59..535a4f8bf 100755 --- a/bin/load-data.py +++ b/bin/load-data.py @@ -427,7 +427,7 @@ def main(): def log_file_list(header, file_list): if (len(file_list) == 0): return LOG.debug(header) - map(LOG.debug, map(os.path.basename, file_list)) + list(map(LOG.debug, list(map(os.path.basename, file_list)))) LOG.debug("\n") log_file_list("Impala Create Files:", impala_create_files) diff --git a/bin/run-workload.py b/bin/run-workload.py index 1da5dd0f5..612c83436 100755 --- a/bin/run-workload.py +++ b/bin/run-workload.py @@ -172,8 +172,8 @@ def prettytable_print(results, failed=False): def print_result_summary(results): """Print failed and successfull queries for a given result list""" - failed_results = filter(lambda x: x.success == False, results) - successful_results = filter(lambda x: x.success == True, results) + failed_results = [x for x in results if not x.success] + successful_results = [x for x in results if x.success] prettytable_print(successful_results) if failed_results: prettytable_print(failed_results, failed=True) @@ -195,7 +195,7 @@ def get_workload_scale_factor(): def split_and_strip(input_string, delim=","): """Convert a string into a list using the given delimiter""" if not input_string: return list() - return map(str.strip, input_string.split(delim)) + return list(map(str.strip, input_string.split(delim))) def create_workload_config(): """Parse command line inputs. diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py index f64d94e6c..1452c9fee 100755 --- a/bin/single_node_perf_run.py +++ b/bin/single_node_perf_run.py @@ -70,6 +70,7 @@ # --ninja use ninja, rather than Make, as the build tool from __future__ import absolute_import, division, print_function +from builtins import range from optparse import OptionParser from tempfile import mkdtemp diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py index 6c5fa07d4..5fac2c2d6 100755 --- a/bin/start-impala-cluster.py +++ b/bin/start-impala-cluster.py @@ -21,6 +21,7 @@ # ImpalaD instances. Each ImpalaD runs on a different port allowing this to be run # on a single machine. from __future__ import absolute_import, division, print_function +from builtins import range import getpass import itertools import json @@ -548,7 +549,7 @@ class MiniClusterOperations(object): cluster_size, num_coordinators, use_exclusive_coordinators, remap_ports=True, start_idx=start_idx) assert cluster_size == len(impalad_arg_lists) - for i in xrange(start_idx, start_idx + cluster_size): + for i in range(start_idx, start_idx + cluster_size): service_name = impalad_service_name(i) LOG.info("Starting Impala Daemon logging to {log_dir}/{service_name}.INFO".format( log_dir=options.log_dir, service_name=service_name)) @@ -624,7 +625,7 @@ class DockerMiniClusterOperations(object): use_exclusive_coordinators, remap_ports=False, admissiond_host="admissiond") assert cluster_size == len(impalad_arg_lists) mem_limit = compute_impalad_mem_limit(cluster_size) - for i in xrange(cluster_size): + for i in range(cluster_size): chosen_ports = choose_impalad_ports(i) port_map = {DEFAULT_BEESWAX_PORT: chosen_ports['beeswax_port'], DEFAULT_HS2_PORT: chosen_ports['hs2_port'], diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py index 23cb88556..69683a2f4 100755 --- a/testdata/bin/generate-schema-statements.py +++ b/testdata/bin/generate-schema-statements.py @@ -276,7 +276,7 @@ def parse_table_properties(file_format, table_properties): r'(?:(\w+):)?' + # Required key=value, capturing the key and value r'(.+?)=(.*)') - for table_property in filter(None, table_properties.split("\n")): + for table_property in [_f for _f in table_properties.split("\n") if _f]: m = TABLE_PROPERTY_RE.match(table_property) if not m: raise Exception("Invalid table property line: {0}", format(table_property)) diff --git a/testdata/bin/generate-test-vectors.py b/testdata/bin/generate-test-vectors.py index 00e7228e8..9b5272c37 100755 --- a/testdata/bin/generate-test-vectors.py +++ b/testdata/bin/generate-test-vectors.py @@ -41,6 +41,7 @@ # downloaded from http://pypi.python.org/pypi/AllPairs/2.0.1 # from __future__ import absolute_import, division, print_function +from builtins import range import collections import csv import math diff --git a/testdata/bin/load_nested.py b/testdata/bin/load_nested.py index 7a4faceed..929a421b7 100755 --- a/testdata/bin/load_nested.py +++ b/testdata/bin/load_nested.py @@ -21,6 +21,7 @@ loaded. ''' from __future__ import absolute_import, division, print_function +from builtins import range import logging import os @@ -105,7 +106,7 @@ def load(): "external": external} # Split table creation into multiple queries or "chunks" so less memory is needed. - for chunk_idx in xrange(chunks): + for chunk_idx in range(chunks): sql_params["chunk_idx"] = chunk_idx # Create the nested data in text format. The \00#'s are nested field terminators, @@ -144,7 +145,7 @@ def load(): else: impala.execute("INSERT INTO TABLE tmp_orders_string " + tmp_orders_sql) - for chunk_idx in xrange(chunks): + for chunk_idx in range(chunks): sql_params["chunk_idx"] = chunk_idx tmp_customer_sql = r""" SELECT STRAIGHT_JOIN diff --git a/testdata/bin/random_avro_schema.py b/testdata/bin/random_avro_schema.py index e065b1dcc..ef084edbc 100755 --- a/testdata/bin/random_avro_schema.py +++ b/testdata/bin/random_avro_schema.py @@ -18,6 +18,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from random import choice, randint, random, shuffle from os.path import join as join_path from optparse import OptionParser diff --git a/testdata/bin/rewrite-iceberg-metadata.py b/testdata/bin/rewrite-iceberg-metadata.py index 2f8e22e32..d0c4d40dc 100755 --- a/testdata/bin/rewrite-iceberg-metadata.py +++ b/testdata/bin/rewrite-iceberg-metadata.py @@ -18,6 +18,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import map import glob import json import os @@ -52,8 +53,7 @@ def add_prefix_to_snapshot(snapshot): if 'manifest-list' in snapshot: snapshot['manifest-list'] = generate_new_path(prefix, snapshot['manifest-list']) if 'manifests' in snapshot: - snapshot['manifests'] = map(lambda m: generate_new_path(prefix, m), - snapshot['manifests']) + snapshot['manifests'] = [generate_new_path(prefix, m) for m in snapshot['manifests']] return snapshot @@ -99,11 +99,11 @@ for arg in args[1:]: # snapshots: optional if 'snapshots' in metadata: - metadata['snapshots'] = map(add_prefix_to_snapshot, metadata['snapshots']) + metadata['snapshots'] = list(map(add_prefix_to_snapshot, metadata['snapshots'])) # metadata-log: optional if 'metadata-log' in metadata: - metadata['metadata-log'] = map(add_prefix_to_mlog, metadata['metadata-log']) + metadata['metadata-log'] = list(map(add_prefix_to_mlog, metadata['metadata-log'])) with open(mfile + '.tmp', 'w') as f: json.dump(metadata, f, indent=2) @@ -113,7 +113,7 @@ for arg in args[1:]: with open(afile, 'rb') as f: with DataFileReader(f, DatumReader()) as reader: schema = reader.datum_reader.writers_schema - lines = map(add_prefix_to_snapshot_entry, reader) + lines = list(map(add_prefix_to_snapshot_entry, reader)) with open(afile + '.tmp', 'wb') as f: with DataFileWriter(f, DatumWriter(), schema) as writer: @@ -127,7 +127,7 @@ for arg in args[1:]: with open(snapfile, 'rb') as f: with DataFileReader(f, DatumReader()) as reader: schema = reader.datum_reader.writers_schema - lines = map(fix_manifest_length, reader) + lines = list(map(fix_manifest_length, reader)) with open(snapfile + '.tmp', 'wb') as f: with DataFileWriter(f, DatumWriter(), schema) as writer: diff --git a/testdata/common/cgroups.py b/testdata/common/cgroups.py index 36fe5b75f..5e5d5c048 100755 --- a/testdata/common/cgroups.py +++ b/testdata/common/cgroups.py @@ -20,6 +20,7 @@ # Utility code for creating cgroups for the Impala development environment. # May be used as a library or as a command-line utility for manual testing. from __future__ import absolute_import, division, print_function +from builtins import range import os import sys import errno diff --git a/testdata/common/text_delims_table.py b/testdata/common/text_delims_table.py index a5b06acf0..5a605e548 100755 --- a/testdata/common/text_delims_table.py +++ b/testdata/common/text_delims_table.py @@ -23,6 +23,7 @@ # print a SQL load statement to incorporate into dataload SQL script generation. from __future__ import absolute_import, division, print_function +from builtins import range from shutil import rmtree from optparse import OptionParser from contextlib import contextmanager @@ -35,7 +36,7 @@ parser.add_option("--only_newline", dest="only_newline", default=False, action=" parser.add_option("--file_len", dest="file_len", type="int") def generate_testescape_files(table_location, only_newline, file_len): - data = ''.join(["1234567890" for _ in xrange(1 + file_len // 10)]) + data = ''.join(["1234567890" for _ in range(1 + file_len // 10)]) suffix_list = ["\\", ",", "a"] if only_newline: diff --git a/testdata/common/widetable.py b/testdata/common/widetable.py index f04b5cc69..405bdedf3 100755 --- a/testdata/common/widetable.py +++ b/testdata/common/widetable.py @@ -23,6 +23,7 @@ # into dataload SQL script generation. from __future__ import absolute_import, division, print_function +from builtins import range from datetime import datetime, timedelta import itertools import optparse @@ -51,7 +52,7 @@ def get_columns(num_cols): iter = itertools.cycle(templates) # Produces [bool_col1, tinyint_col1, ..., bool_col2, tinyint_col2, ...] # The final list has 'num_cols' elements. - return [iter.next() % (i // len(templates) + 1) for i in xrange(num_cols)] + return [iter.next() % (i // len(templates) + 1) for i in range(num_cols)] # Data generators for different types. Each generator yields an infinite number of # value strings suitable for writing to a CSV file. @@ -100,11 +101,11 @@ def get_data(num_cols, num_rows, delimiter=',', quote_strings=False): ] # Create a generator instance for each column, cycling through the different types iter = itertools.cycle(generators) - column_generators = [iter.next()() for i in xrange(num_cols)] + column_generators = [iter.next()() for i in range(num_cols)] # Populate each row using column_generators rows = [] - for i in xrange(num_rows): + for i in range(num_rows): vals = [gen.next() for gen in column_generators] rows.append(delimiter.join(vals)) return rows diff --git a/tests/authorization/test_ranger.py b/tests/authorization/test_ranger.py index 03669f081..9f94cc1a9 100644 --- a/tests/authorization/test_ranger.py +++ b/tests/authorization/test_ranger.py @@ -18,6 +18,7 @@ # Client tests for SQL statement authorization from __future__ import absolute_import, division, print_function +from builtins import map, range import os import grp import json @@ -1017,7 +1018,7 @@ class TestRanger(CustomClusterTestSuite): cols = row.split("\t") return cols[0:len(cols) - 1] - assert map(columns, result.data) == expected + assert list(map(columns, result.data)) == expected def _refresh_authorization(self, client, statement): if statement is not None: diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py index f7c2eb3f3..c58bb7109 100644 --- a/tests/beeswax/impala_beeswax.py +++ b/tests/beeswax/impala_beeswax.py @@ -26,6 +26,7 @@ # result = client.execute(query_string) # where result is an object of the class ImpalaBeeswaxResult. from __future__ import absolute_import, division, print_function +from builtins import filter, map, range import logging import time import shlex @@ -336,7 +337,7 @@ class ImpalaBeeswaxClient(object): idx = \ self.__build_summary_table( summary, idx, False, indent_level, False, first_child_output) - for child_idx in xrange(1, node.num_children): + for child_idx in range(1, node.num_children): # All other children are indented (we only have 0, 1 or 2 children for every exec # node at the moment) idx = self.__build_summary_table( @@ -498,7 +499,7 @@ class ImpalaBeeswaxClient(object): return tokens[0].lower() # Because the WITH clause may precede INSERT or SELECT queries, # just checking the first token is insufficient. - if filter(self.INSERT_REGEX.match, tokens): + if list(filter(self.INSERT_REGEX.match, tokens)): return "insert" return tokens[0].lower() diff --git a/tests/benchmark/plugins/vtune_plugin.py b/tests/benchmark/plugins/vtune_plugin.py index ada5e1069..aad27298c 100644 --- a/tests/benchmark/plugins/vtune_plugin.py +++ b/tests/benchmark/plugins/vtune_plugin.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtin import filter, map from os import environ from tests.util.cluster_controller import ClusterController from tests.benchmark.plugins import Plugin @@ -117,7 +118,7 @@ class VTunePlugin(Plugin): def _kill_vtune(self, host_dict): # This method kills threads that are still hanging around after timeout - kill_list = filter(self.__is_not_none_or_empty_str, host_dict.keys()) + kill_list = list(filter(self.__is_not_none_or_empty_str, host_dict.keys())) if kill_list: self.cluster_controller.deprecated_run_cmd(self.KILL_CMD, hosts=kill_list) diff --git a/tests/benchmark/report_benchmark_results.py b/tests/benchmark/report_benchmark_results.py index 680a871f9..b7b1c50a5 100755 --- a/tests/benchmark/report_benchmark_results.py +++ b/tests/benchmark/report_benchmark_results.py @@ -29,6 +29,7 @@ # if necessary (2.5). from __future__ import absolute_import, division, print_function +from builtins import range import difflib import json import logging @@ -914,7 +915,7 @@ class ExecSummaryComparison(object): table.align = 'l' table.float_format = '.2' table_contains_at_least_one_row = False - for row in filter(lambda row: is_significant(row), self.rows): + for row in [row for row in self.rows if is_significant(row)]: table_row = [row[OPERATOR], '{0:.2%}'.format(row[PERCENT_OF_QUERY]), '{0:.2%}'.format(row[RSTD]), diff --git a/tests/common/environ.py b/tests/common/environ.py index 80c2750ac..d5c7f8f9f 100644 --- a/tests/common/environ.py +++ b/tests/common/environ.py @@ -96,7 +96,7 @@ kernel_release = os.uname()[2] kernel_version_regex = re.compile(r'(\d+)\.(\d+)\.(\d+)\-(\d+).*') kernel_version_match = kernel_version_regex.match(kernel_release) if kernel_version_match is not None and len(kernel_version_match.groups()) == 4: - kernel_version = map(lambda x: int(x), list(kernel_version_match.groups())) + kernel_version = [int(x) for x in list(kernel_version_match.groups())] IS_BUGGY_EL6_KERNEL = 'el6' in kernel_release and kernel_version < [2, 6, 32, 674] class ImpalaBuildFlavors: diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py index 0b43189eb..8a02e952f 100644 --- a/tests/common/impala_cluster.py +++ b/tests/common/impala_cluster.py @@ -18,6 +18,7 @@ # Basic object model of an Impala cluster (set of Impala processes). from __future__ import absolute_import, division, print_function +from builtins import map, range import json import logging import os diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index 605d9894a..92099170e 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -18,6 +18,7 @@ # The base class that should be used for almost all Impala tests from __future__ import absolute_import, division, print_function +from builtins import range import glob import grp import json @@ -467,7 +468,7 @@ class ImpalaTestSuite(BaseTestSuite): # Parse the /varz endpoint to get the flag information. varz = self.get_debug_page(VARZ_URL) assert 'flags' in varz.keys() - filtered_varz = filter(lambda flag: flag['name'] == var, varz['flags']) + filtered_varz = [flag for flag in varz['flags'] if flag['name'] == var] assert len(filtered_varz) == 1 assert 'current' in filtered_varz[0].keys() return filtered_varz[0]['current'].strip() diff --git a/tests/common/kudu_test_suite.py b/tests/common/kudu_test_suite.py index 5292b31eb..0d8eca371 100644 --- a/tests/common/kudu_test_suite.py +++ b/tests/common/kudu_test_suite.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import os import pytest import requests @@ -110,7 +111,7 @@ class KuduTestSuite(ImpalaTestSuite): @classmethod def random_table_name(cls): - return "".join(choice(string.lowercase) for _ in xrange(10)) + return "".join(choice(string.lowercase) for _ in range(10)) @classmethod def to_kudu_table_name(cls, db_name, tbl_name): @@ -144,7 +145,7 @@ class KuduTestSuite(ImpalaTestSuite): if not col_names: if len(col_types) > 26: raise Exception("Too many columns for default naming") - col_names = [chr(97 + i) for i in xrange(len(col_types))] + col_names = [chr(97 + i) for i in range(len(col_types))] schema_builder = SchemaBuilder() for i, t in enumerate(col_types): column_spec = schema_builder.add_column(col_names[i], type_=t) diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py index 48d147541..3f9bf23fa 100644 --- a/tests/common/test_dimensions.py +++ b/tests/common/test_dimensions.py @@ -18,6 +18,7 @@ # Common test dimensions and associated utility functions. from __future__ import absolute_import, division, print_function +from builtins import range import copy import os from itertools import product @@ -83,7 +84,7 @@ class TableFormatInfo(object): raise ValueError('Table format string cannot be None') format_parts = table_format_string.strip().split('/') - if len(format_parts) not in range(2, 4): + if len(format_parts) not in list(range(2, 4)): raise ValueError('Invalid table format %s' % table_format_string) file_format, compression_codec = format_parts[:2] diff --git a/tests/common/test_result_verifier.py b/tests/common/test_result_verifier.py index 59f637d9a..e3b90aad9 100644 --- a/tests/common/test_result_verifier.py +++ b/tests/common/test_result_verifier.py @@ -18,6 +18,7 @@ # This modules contians utility functions used to help verify query test results. from __future__ import absolute_import, division, print_function +from builtins import map, range import logging import math import re @@ -457,7 +458,7 @@ def verify_raw_results(test_section, exec_result, file_format, result_section, expected_results_list = re.findall(r'\[(.*?)\]', expected_results, flags=re.DOTALL) if not is_raw_string: # Needs escaping - expected_results_list = map(lambda s: s.replace('\n', '\\n'), expected_results_list) + expected_results_list = [s.replace('\n', '\\n') for s in expected_results_list] else: expected_results_list = split_section_lines(expected_results) expected = QueryTestResult(expected_results_list, expected_types, @@ -503,7 +504,7 @@ def parse_result_rows(exec_result, escape_strings=True): cols = row.split('\t') assert len(cols) == len(col_types) new_cols = list() - for i in xrange(len(cols)): + for i in range(len(cols)): if col_types[i] in ['STRING', 'CHAR', 'VARCHAR', 'BINARY']: col = cols[i] if isinstance(col, str): @@ -588,7 +589,7 @@ def compute_aggregation(function, field, runtime_profile): if (field_regex_re.search(line)): match_list.extend(re.findall(field_regex, line)) - int_match_list = map(int, match_list) + int_match_list = list(map(int, match_list)) result = None if function == 'SUM': result = sum(int_match_list) @@ -620,7 +621,7 @@ def verify_runtime_profile(expected, actual, update_section=False): # Check the expected and actual rows pairwise. for line in actual.splitlines(): - for i in xrange(len(expected_lines)): + for i in range(len(expected_lines)): if matched[i]: continue if expected_regexes[i] is not None: match = expected_regexes[i].match(line) @@ -638,7 +639,7 @@ def verify_runtime_profile(expected, actual, update_section=False): break unmatched_lines = [] - for i in xrange(len(expected_lines)): + for i in range(len(expected_lines)): if not matched[i] and unexpected_regexes[i] is None: unmatched_lines.append(expected_lines[i]) assert len(unmatched_lines) == 0, ("Did not find matches for lines in runtime profile:" @@ -650,7 +651,7 @@ def verify_runtime_profile(expected, actual, update_section=False): updated_aggregations = [] # Compute the aggregations and check against values - for i in xrange(len(expected_aggregations)): + for i in range(len(expected_aggregations)): if (expected_aggregations[i] is None): continue function, field, op, expected_value = expected_aggregations[i] actual_value = compute_aggregation(function, field, actual) diff --git a/tests/comparison/cluster.py b/tests/comparison/cluster.py index c8bb416d8..f95de563d 100644 --- a/tests/comparison/cluster.py +++ b/tests/comparison/cluster.py @@ -21,6 +21,7 @@ # module depends on db_connection which use some query generator classes. from __future__ import absolute_import, division, print_function +from builtins import range, zip import hdfs import logging import os @@ -33,7 +34,6 @@ from collections import defaultdict from collections import OrderedDict from contextlib import contextmanager from getpass import getuser -from itertools import izip from multiprocessing.pool import ThreadPool from random import choice from StringIO import StringIO @@ -227,7 +227,7 @@ class MiniCluster(Cluster): hs2_base_port = 21050 web_ui_base_port = 25000 impalads = [MiniClusterImpalad(hs2_base_port + p, web_ui_base_port + p) - for p in xrange(self.num_impalads)] + for p in range(self.num_impalads)] self._impala = Impala(self, impalads) class MiniHiveCluster(MiniCluster): @@ -615,7 +615,7 @@ class Impala(Service): return dict.fromkeys(stopped_impalads) messages = OrderedDict() impalads_with_message = dict() - for i, message in izip(stopped_impalads, self.for_each_impalad( + for i, message in zip(stopped_impalads, self.for_each_impalad( lambda i: i.find_last_crash_message(start_time), impalads=stopped_impalads)): if message: impalads_with_message[i] = "%s crashed:\n%s" % (i.host_name, message) @@ -631,7 +631,7 @@ class Impala(Service): # Python doesn't handle ctrl-c well unless a timeout is provided. results = promise.get(maxint) if as_dict: - results = dict(izip(impalads, results)) + results = dict(zip(impalads, results)) return results def restart(self): diff --git a/tests/comparison/data_generator.py b/tests/comparison/data_generator.py index 45dd611b7..9e744f96b 100755 --- a/tests/comparison/data_generator.py +++ b/tests/comparison/data_generator.py @@ -27,6 +27,7 @@ ''' from __future__ import absolute_import, division, print_function +from builtins import filter, range import os from copy import deepcopy from logging import getLogger @@ -110,7 +111,7 @@ class DbPopulator(object): hdfs = self.cluster.hdfs.create_client() table_and_generators = list() - for table_idx in xrange(table_count): + for table_idx in range(table_count): table = self._create_random_table( 'table_%s' % (table_idx + 1), self.min_col_count, @@ -183,9 +184,10 @@ class DbPopulator(object): # doesn't actually modify the table's columns. 'table.cols' should be changed # to allow access to the real columns. cols = table.cols - for col_idx in xrange(col_count): + for col_idx in range(col_count): col_type = choice(allowed_types) - col_type = choice(filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES)) + col_type = \ + choice(list(filter(lambda type_: issubclass(type_, col_type), EXACT_TYPES))) if issubclass(col_type, VarChar) and not issubclass(col_type, String): col_type = get_varchar_class(randint(1, VarChar.MAX)) elif issubclass(col_type, Char) and not issubclass(col_type, String): diff --git a/tests/comparison/data_generator_mapred_common.py b/tests/comparison/data_generator_mapred_common.py index dfc811147..78d7d8d38 100644 --- a/tests/comparison/data_generator_mapred_common.py +++ b/tests/comparison/data_generator_mapred_common.py @@ -24,6 +24,7 @@ ''' from __future__ import absolute_import, division, print_function +from base import range import base64 import pickle import StringIO @@ -55,13 +56,13 @@ class TextTableDataGenerator(object): col_val_generators = [self._create_val_generator(c.exact_type) for c in cols] val_buffer_size = 1024 col_val_buffers = [[None] * val_buffer_size for c in cols] - for row_idx in xrange(self.row_count): + for row_idx in range(self.row_count): val_buffer_idx = row_idx % val_buffer_size if val_buffer_idx == 0: for col_idx, col in enumerate(cols): val_buffer = col_val_buffers[col_idx] val_generator = col_val_generators[col_idx] - for idx in xrange(val_buffer_size): + for idx in range(val_buffer_size): val = next(val_generator) val_buffer[idx] = r"\N" if val is None else val for col_idx, col in enumerate(cols): diff --git a/tests/comparison/db_connection.py b/tests/comparison/db_connection.py index e641b15c9..0177deb6a 100644 --- a/tests/comparison/db_connection.py +++ b/tests/comparison/db_connection.py @@ -22,6 +22,7 @@ ''' from __future__ import absolute_import, division, print_function +from builtins import filter, map, range, zip import hashlib import impala.dbapi import re @@ -29,7 +30,7 @@ import shelve from abc import ABCMeta, abstractmethod from contextlib import closing from decimal import Decimal as PyDecimal -from itertools import combinations, ifilter, izip +from itertools import combinations from logging import getLogger from os import symlink, unlink from pyparsing import ( @@ -123,7 +124,7 @@ class DbCursor(object): table_keys=table.primary_key_names)) mismatch = True break - for left, right in izip(common_table.cols, table.cols): + for left, right in zip(common_table.cols, table.cols): if not (left.name == right.name and left.type == right.type): LOG.debug('Ignoring table %s. It has different columns %s vs %s.' % (table_name, left, right)) @@ -513,10 +514,10 @@ class DbCursor(object): table = self.describe_table(table_name) sql_templ = 'SELECT COUNT(*) FROM %s GROUP BY %%s HAVING COUNT(*) > 1' % table.name unique_cols = list() - for current_depth in xrange(1, depth + 1): + for current_depth in range(1, depth + 1): for cols in combinations(table.cols, current_depth): # redundant combos excluded cols = set(cols) - if any(ifilter(lambda unique_subset: unique_subset < cols, unique_cols)): + if any(filter(lambda unique_subset: unique_subset < cols, unique_cols)): # cols contains a combo known to be unique continue col_names = ', '.join(col.name for col in cols) diff --git a/tests/comparison/discrepancy_searcher.py b/tests/comparison/discrepancy_searcher.py index ce56d7961..2e1fdb143 100755 --- a/tests/comparison/discrepancy_searcher.py +++ b/tests/comparison/discrepancy_searcher.py @@ -25,9 +25,9 @@ # TODO: IMPALA-4600: refactor this module from __future__ import absolute_import, division, print_function +from builtins import range, zip from copy import deepcopy from decimal import Decimal -from itertools import izip from logging import getLogger from math import isinf, isnan from os import getenv, symlink, unlink @@ -182,8 +182,8 @@ class QueryResultComparator(object): data_set.sort(cmp=self.row_sort_cmp) found_data = False # Will be set to True if the result contains non-zero/NULL data - for ref_row, test_row in izip(ref_data_set, test_data_set): - for col_idx, (ref_val, test_val) in enumerate(izip(ref_row, test_row)): + for ref_row, test_row in zip(ref_data_set, test_data_set): + for col_idx, (ref_val, test_val) in enumerate(zip(ref_row, test_row)): if ref_val or test_val: # Ignores zeros, ex "SELECT COUNT(*) ... WHERE FALSE" found_data = True if self.vals_are_equal(ref_val, test_val): @@ -222,7 +222,7 @@ class QueryResultComparator(object): def row_sort_cmp(self, ref_row, test_row): '''Comparison used for sorting. ''' - for ref_val, test_val in izip(ref_row, test_row): + for ref_val, test_val in zip(ref_row, test_row): if ref_val is None and test_val is not None: return -1 if ref_val is not None and test_val is None: @@ -368,7 +368,7 @@ class QueryExecutor(object): self._table_or_view_name = query.dml_table.name query_threads = list() - for sql_writer, cursor, log_file in izip( + for sql_writer, cursor, log_file in zip( self.sql_writers, self.cursors, self.query_logs ): if self.ENABLE_RANDOM_QUERY_OPTIONS and cursor.db_type == IMPALA: @@ -387,7 +387,7 @@ class QueryExecutor(object): query_threads.append(query_thread) end_time = time() + self.query_timeout_seconds - for query_thread, cursor in izip(query_threads, self.cursors): + for query_thread, cursor in zip(query_threads, self.cursors): join_time = end_time - time() if join_time > 0: query_thread.join(join_time) @@ -481,7 +481,7 @@ class QueryExecutor(object): def _create_random_table_name(self): char_choices = ascii_lowercase chars = list() - for idx in xrange(4): # will result in ~1M combinations + for idx in range(4): # will result in ~1M combinations if idx == 1: char_choices += '_' + digits chars.append(choice(char_choices)) @@ -585,7 +585,7 @@ class FrontendExceptionSearcher(object): LOG.error("Error generating explain plan for test db:\n%s" % sql) raise e - for idx in xrange(number_of_test_queries): + for idx in range(number_of_test_queries): LOG.info("Explaining query #%s" % (idx + 1)) statement_type = self.query_profile.choose_statement() statement_generator = get_generator(statement_type)(self.query_profile) diff --git a/tests/comparison/funcs.py b/tests/comparison/funcs.py index 7a0f37272..ad523fe6d 100644 --- a/tests/comparison/funcs.py +++ b/tests/comparison/funcs.py @@ -16,8 +16,8 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import filter from copy import deepcopy -from itertools import ifilter from tests.comparison.common import ValExpr from tests.comparison.db_types import ( @@ -419,7 +419,7 @@ def create_func(name, returns=None, accepts=[], signatures=[], base_type=Func): for arg_idx, arg in enumerate(signature.args): replacement_signature = None if arg.is_subquery: - if any(ifilter(lambda type_: type_ == Number, arg.type)): + if any(filter(lambda type_: type_ == Number, arg.type)): raise Exception('Number not accepted in subquery signatures') elif arg.type == Number: for replacement_type in [Decimal, Int, Float]: diff --git a/tests/comparison/query.py b/tests/comparison/query.py index c3e80352f..dd73f895e 100644 --- a/tests/comparison/query.py +++ b/tests/comparison/query.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from abc import ABCMeta, abstractproperty from copy import deepcopy from logging import getLogger @@ -43,7 +44,7 @@ class StatementExecutionMode(object): DML_SETUP, # a DML statement that's actually a test DML_TEST, - ) = xrange(5) + ) = range(5) class AbstractStatement(object): diff --git a/tests/comparison/query_generator.py b/tests/comparison/query_generator.py index f4150fc86..3da76a1ab 100644 --- a/tests/comparison/query_generator.py +++ b/tests/comparison/query_generator.py @@ -16,9 +16,9 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import filter, range from collections import defaultdict from copy import deepcopy -from itertools import ifilter from logging import getLogger from random import shuffle, choice, randint, randrange @@ -254,7 +254,7 @@ class QueryGenerator(object): table_exprs = TableExprList(table_exprs) with_clause_inline_views = TableExprList() for with_clause_inline_view_idx \ - in xrange(self.profile.get_with_clause_table_ref_count()): + in range(self.profile.get_with_clause_table_ref_count()): query = self.generate_statement(table_exprs, allow_with_clause=self.profile.use_nested_with()) with_clause_alias_count = getattr(self.root_query, 'with_clause_alias_count', 0) + 1 @@ -933,7 +933,7 @@ class QueryGenerator(object): else: excluded_designs.append('DETERMINISTIC_ORDER') - allow_agg = any(ifilter(lambda expr: expr.contains_agg, select_item_exprs)) + allow_agg = any(filter(lambda expr: expr.contains_agg, select_item_exprs)) value = self._create_analytic_func_tree(return_type, excluded_designs, allow_agg) value = self.populate_func_with_vals( value, @@ -1125,7 +1125,7 @@ class QueryGenerator(object): table_expr.alias = self.get_next_id() from_clause = FromClause(table_expr) - for idx in xrange(1, table_count): + for idx in range(1, table_count): join_clause = self._create_join_clause(from_clause, table_exprs) join_clause.table_expr.alias = self.get_next_id() from_clause.join_clauses.append(join_clause) @@ -1294,7 +1294,7 @@ class QueryGenerator(object): % (arg_stop_idx, arg_start_idx)) if null_args is None: null_args = list() - for idx in xrange(arg_start_idx, arg_stop_idx): + for idx in range(arg_start_idx, arg_stop_idx): arg = func.args[idx] if arg.is_constant and issubclass(arg.type, allowed_types): assert arg.val is None @@ -1422,7 +1422,7 @@ class QueryGenerator(object): if not relational_col_types: relational_col_types = tuple() - for _ in xrange(func_count): + for _ in range(func_count): is_relational = False if and_or_count > 0: @@ -1495,12 +1495,12 @@ def generate_queries_for_manual_inspection(): tables = list() data_types = list(TYPES) data_types.remove(Float) - for table_idx in xrange(NUM_TABLES): + for table_idx in range(NUM_TABLES): table = Table('table_%s' % table_idx) tables.append(table) cols = table.cols col_idx = 0 - for _ in xrange(NUM_COLS_EACH_TYPE): + for _ in range(NUM_COLS_EACH_TYPE): for col_type in data_types: col = Column(table, '%s_col_%s' % (col_type.__name__.lower(), col_idx), col_type) cols.append(col) @@ -1512,7 +1512,7 @@ def generate_queries_for_manual_inspection(): sql_writer = SqlWriter.create(dialect='IMPALA') ref_writer = SqlWriter.create(dialect='POSTGRESQL', nulls_order_asc=query_profile.nulls_order_asc()) - for _ in xrange(NUM_QUERIES): + for _ in range(NUM_QUERIES): query = query_generator.generate_statement(tables) print("Test db") print(sql_writer.write_query(query) + '\n') diff --git a/tests/comparison/query_profile.py b/tests/comparison/query_profile.py index f5003732a..e47f7a6c6 100644 --- a/tests/comparison/query_profile.py +++ b/tests/comparison/query_profile.py @@ -16,6 +16,8 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import filter +from functools import reduce from logging import getLogger from random import choice, randint, random, shuffle @@ -303,14 +305,14 @@ class DefaultProfile(object): return choice_ numeric_choice -= weight - def _choose_from_filtered_weights(self, filter, *weights): + def _choose_from_filtered_weights(self, filter_fn, *weights): '''Convenience method, apply the given filter before choosing a value.''' if isinstance(weights[0], str): weights = self.weights(*weights) else: weights = weights[0] return self._choose_from_weights(dict((choice_, weight) for choice_, weight - in weights.iteritems() if filter(choice_))) + in weights.iteritems() if filter_fn(choice_))) def _decide_from_probability(self, *keys): return random() < self.probability(*keys) @@ -490,11 +492,11 @@ class DefaultProfile(object): ''' if not signatures: raise Exception('At least one signature is required') - filtered_signatures = filter( + filtered_signatures = list(filter( lambda s: s.return_type == Boolean \ and len(s.args) > 1 \ and not any(a.is_subquery for a in s.args), - signatures) + signatures)) if not filtered_signatures: raise Exception( 'None of the provided signatures corresponded to a relational function') @@ -717,7 +719,7 @@ class TestFunctionProfile(DefaultProfile): def choose_func_signature(self, signatures): if not signatures: raise Exception('At least one signature is required') - preferred_signatures = filter(lambda s: "DistinctFrom" in s.func._NAME, signatures) + preferred_signatures = [s for s in signatures if "DistinctFrom" in s.func._NAME] if preferred_signatures: signatures = preferred_signatures return super(TestFunctionProfile, self).choose_func_signature(signatures) diff --git a/tests/comparison/statement_generator.py b/tests/comparison/statement_generator.py index 3cfa713c4..81e956710 100644 --- a/tests/comparison/statement_generator.py +++ b/tests/comparison/statement_generator.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from copy import deepcopy from tests.comparison.common import Table @@ -114,7 +115,7 @@ class InsertStatementGenerator(object): null. """ values_rows = [] - for _ in xrange(self.profile.choose_insert_values_row_count()): + for _ in range(self.profile.choose_insert_values_row_count()): values_row = [] for col in columns: if col.is_primary_key: diff --git a/tests/conftest.py b/tests/conftest.py index ae8fc0a3c..d7bfd4372 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,6 +18,7 @@ # py.test configuration module # from __future__ import absolute_import, division, print_function +from builtins import map, range from impala.dbapi import connect as impala_connect from kudu import connect as kudu_connect from random import choice, sample @@ -231,7 +232,7 @@ def pytest_generate_tests(metafunc): LOG.warning("No test vectors generated for test '%s'. Check constraints and " "input vectors" % metafunc.function.func_name) - vector_names = map(str, vectors) + vector_names = list(map(str, vectors)) # In the case this is a test result update or sanity run, select a single test vector # to run. This is okay for update_results because results are expected to be the same # for all test vectors. @@ -665,7 +666,7 @@ def pytest_collection_modifyitems(items, config, session): return num_items = len(items) - this_shard, num_shards = map(int, config.option.shard_tests.split("/")) + this_shard, num_shards = list(map(int, config.option.shard_tests.split("/"))) assert 0 <= this_shard <= num_shards if this_shard == num_shards: this_shard = 0 diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py index b10f8e4ef..79e4cc3a7 100644 --- a/tests/custom_cluster/test_admission_controller.py +++ b/tests/custom_cluster/test_admission_controller.py @@ -18,6 +18,7 @@ # Tests admission control from __future__ import absolute_import, division, print_function +from builtins import range import itertools import logging import os @@ -854,7 +855,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): EXPECTED_REASON = \ "Latest admission queue reason: number of running queries 1 is at or over limit 1" NUM_QUERIES = 5 - profiles = self._execute_and_collect_profiles([STMT for i in xrange(NUM_QUERIES)], + profiles = self._execute_and_collect_profiles([STMT for i in range(NUM_QUERIES)], TIMEOUT_S) num_reasons = len([profile for profile in profiles if EXPECTED_REASON in profile]) @@ -891,7 +892,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): NUM_QUERIES = 5 # IMPALA-9856: Disable query result spooling so that we can run queries with low # mem_limit. - profiles = self._execute_and_collect_profiles([STMT for i in xrange(NUM_QUERIES)], + profiles = self._execute_and_collect_profiles([STMT for i in range(NUM_QUERIES)], TIMEOUT_S, {'mem_limit': '9mb', 'spool_query_results': '0'}) num_reasons = len([profile for profile in profiles if EXPECTED_REASON in profile]) @@ -933,7 +934,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): NUM_QUERIES = 5 # IMPALA-9856: Disable query result spooling so that we can run queries with low # mem_limit. - profiles = self._execute_and_collect_profiles([STMT for i in xrange(NUM_QUERIES)], + profiles = self._execute_and_collect_profiles([STMT for i in range(NUM_QUERIES)], TIMEOUT_S, {'mem_limit': '2mb', 'spool_query_results': '0'}, True) EXPECTED_REASON = """.*Admission for query exceeded timeout 1000ms in pool """\ @@ -967,7 +968,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): NUM_QUERIES = 5 # IMPALA-9856: Disable query result spooling so that we can run queries with low # mem_limit. - profiles = self._execute_and_collect_profiles([STMT for i in xrange(NUM_QUERIES)], + profiles = self._execute_and_collect_profiles([STMT for i in range(NUM_QUERIES)], TIMEOUT_S, {'mem_limit': '2mb', 'spool_query_results': '0'}, True) EXPECTED_REASON = """.*Admission for query exceeded timeout 1000ms in pool """\ @@ -1001,7 +1002,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): "admission-controller.total-dequeue-failed-coordinator-limited" original_metric_value = self.get_ac_process().service.get_metric_value( coordinator_limited_metric) - profiles = self._execute_and_collect_profiles([STMT for i in xrange(NUM_QUERIES)], + profiles = self._execute_and_collect_profiles([STMT for i in range(NUM_QUERIES)], TIMEOUT_S, config_options={"mt_dop": 4}) num_reasons = len([profile for profile in profiles if EXPECTED_REASON in profile]) @@ -1403,7 +1404,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite): STMT = "select sleep(100)" TIMEOUT_S = 60 NUM_QUERIES = 5 - profiles = self._execute_and_collect_profiles([STMT for i in xrange(NUM_QUERIES)], + profiles = self._execute_and_collect_profiles([STMT for i in range(NUM_QUERIES)], TIMEOUT_S, allow_query_failure=True) ADMITTED_STALENESS_WARNING = \ "Warning: admission control information from statestore is stale" @@ -1795,7 +1796,7 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase): num_submitted queries. See IMPALA-6227 for an example of problems with inconsistent metrics where a dequeued query is reflected in dequeued but not admitted.""" ATTEMPTS = 5 - for i in xrange(ATTEMPTS): + for i in range(ATTEMPTS): metrics = self.get_admission_metrics() admitted_immediately = num_submitted - metrics['queued'] - metrics['rejected'] if admitted_immediately + metrics['dequeued'] == metrics['admitted']: @@ -1891,7 +1892,7 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase): # Request admitted clients to end their queries current_executing_queries = [] - for i in xrange(num_queries): + for i in range(num_queries): # pop() is thread-safe, it's OK if another thread is appending concurrently. thread = self.executing_threads.pop(0) LOG.info("Cancelling query %s", thread.query_num) @@ -2100,7 +2101,7 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase): initial_metrics = self.get_admission_metrics() log_metrics("Initial metrics: ", initial_metrics) - for query_num in xrange(num_queries): + for query_num in range(num_queries): impalad = self.impalads[query_num % len(self.impalads)] query_end_behavior = QUERY_END_BEHAVIORS[query_num % len(QUERY_END_BEHAVIORS)] thread = self.SubmitQueryThread(impalad, additional_query_options, vector, diff --git a/tests/custom_cluster/test_auto_scaling.py b/tests/custom_cluster/test_auto_scaling.py index 86dda7687..5a9f2cbbf 100644 --- a/tests/custom_cluster/test_auto_scaling.py +++ b/tests/custom_cluster/test_auto_scaling.py @@ -18,6 +18,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import logging import pytest from time import sleep, time diff --git a/tests/custom_cluster/test_blacklist.py b/tests/custom_cluster/test_blacklist.py index d98cd8221..be8d6999f 100644 --- a/tests/custom_cluster/test_blacklist.py +++ b/tests/custom_cluster/test_blacklist.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from tests.common.custom_cluster_test_suite import CustomClusterTestSuite import pytest @@ -223,7 +224,7 @@ class TestBlacklistFaultyDisk(CustomClusterTestSuite): def __generate_scratch_dir(self, num): result = [] - for i in xrange(num): + for i in range(num): dir_path = tempfile.mkdtemp() self.created_dirs.append(dir_path) result.append(dir_path) diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py index c44d3259b..6e43c42b5 100644 --- a/tests/custom_cluster/test_breakpad.py +++ b/tests/custom_cluster/test_breakpad.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import glob import os import psutil @@ -75,7 +76,7 @@ class TestBreakpadBase(CustomClusterTestSuite): def kill_cluster(self, signal): self.cluster.refresh() processes = self.cluster.impalads + [self.cluster.catalogd, self.cluster.statestored] - processes = filter(None, processes) + processes = [_f for _f in processes if _f] self.kill_processes(processes, signal) signal is SIGUSR1 or self.assert_all_processes_killed() @@ -317,7 +318,7 @@ class TestBreakpadExhaustive(TestBreakpadBase): cluster_size = self.get_num_processes('impalad') # We trigger several rounds of minidump creation to make sure that all daemons wrote # enough files to trigger rotation. - for i in xrange(max_minidumps + 1): + for i in range(max_minidumps + 1): self.kill_cluster(SIGUSR1) # Breakpad forks to write its minidump files, sleep briefly to allow the forked # processes to start. diff --git a/tests/custom_cluster/test_codegen_cache.py b/tests/custom_cluster/test_codegen_cache.py index 72b8e28d6..611ba5a03 100644 --- a/tests/custom_cluster/test_codegen_cache.py +++ b/tests/custom_cluster/test_codegen_cache.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from copy import copy from tests.common.custom_cluster_test_suite import CustomClusterTestSuite diff --git a/tests/custom_cluster/test_concurrent_ddls.py b/tests/custom_cluster/test_concurrent_ddls.py index 8d61e2735..4052de682 100644 --- a/tests/custom_cluster/test_concurrent_ddls.py +++ b/tests/custom_cluster/test_concurrent_ddls.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import threading @@ -140,9 +141,9 @@ class TestConcurrentDdls(CustomClusterTestSuite): # Run DDLs with invalidate metadata in parallel NUM_ITERS = 16 worker = [None] * (NUM_ITERS + 1) - for i in xrange(1, NUM_ITERS + 1): + for i in range(1, NUM_ITERS + 1): worker[i] = pool.apply_async(run_ddls, (i,)) - for i in xrange(1, NUM_ITERS + 1): + for i in range(1, NUM_ITERS + 1): try: worker[i].get(timeout=100) except TimeoutError: @@ -185,7 +186,7 @@ class TestConcurrentDdls(CustomClusterTestSuite): NUM_ITERS = 20 pool = ThreadPool(processes=2) - for i in xrange(NUM_ITERS): + for i in range(NUM_ITERS): # Run two INVALIDATE METADATA commands in parallel r1 = pool.apply_async(run_invalidate_metadata) r2 = pool.apply_async(run_invalidate_metadata) diff --git a/tests/custom_cluster/test_concurrent_kudu_create.py b/tests/custom_cluster/test_concurrent_kudu_create.py index ad86ee925..d27db8eba 100644 --- a/tests/custom_cluster/test_concurrent_kudu_create.py +++ b/tests/custom_cluster/test_concurrent_kudu_create.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import threading import time @@ -50,7 +51,7 @@ class TestConcurrentKuduCreate(CustomClusterTestSuite): self.execute_query("drop table if exists %s" % table_name) NUM_ITERS = 20 pool = ThreadPool(processes=3) - for i in xrange(NUM_ITERS): + for i in range(NUM_ITERS): # Run several commands by specific time interval to reproduce this bug r1 = pool.apply_async(run_create_table_if_not_exists) r2 = pool.apply_async(run_create_table_if_not_exists) diff --git a/tests/custom_cluster/test_custom_statestore.py b/tests/custom_cluster/test_custom_statestore.py index 185e3f049..4a5db8149 100644 --- a/tests/custom_cluster/test_custom_statestore.py +++ b/tests/custom_cluster/test_custom_statestore.py @@ -19,6 +19,7 @@ # Tests statestore with non-default startup options from __future__ import absolute_import, division, print_function +from builtins import range import logging import os import pytest @@ -79,7 +80,7 @@ class TestCustomStatestore(CustomClusterTestSuite): exceeded.""" # With a statestore_max_subscribers of 3, we should hit the registration error # pretty quick. - for x in xrange(20): + for x in range(20): response = self.__register_subscriber() if response.status.status_code == TErrorCode.OK: self.registration_id = response.registration_id diff --git a/tests/custom_cluster/test_events_custom_configs.py b/tests/custom_cluster/test_events_custom_configs.py index 703f11b61..dc7ac51c9 100644 --- a/tests/custom_cluster/test_events_custom_configs.py +++ b/tests/custom_cluster/test_events_custom_configs.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import logging import pytest @@ -199,7 +200,7 @@ class TestEventProcessingCustomConfigs(CustomClusterTestSuite): removed_metric_val_before = EventProcessorUtils.get_int_metric(removed_metric_name, 0) events_skipped_before = EventProcessorUtils.get_int_metric('events-skipped', 0) num_iters = 100 - for iter in xrange(num_iters): + for iter in range(num_iters): for q in queries: try: self.execute_query_expect_success(self.create_impala_client(), q) diff --git a/tests/custom_cluster/test_exchange_deferred_batches.py b/tests/custom_cluster/test_exchange_deferred_batches.py index 52a70a074..05c97ed47 100644 --- a/tests/custom_cluster/test_exchange_deferred_batches.py +++ b/tests/custom_cluster/test_exchange_deferred_batches.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.skip import SkipIfBuildType diff --git a/tests/custom_cluster/test_executor_groups.py b/tests/custom_cluster/test_executor_groups.py index f0d3c3293..5d624d969 100644 --- a/tests/custom_cluster/test_executor_groups.py +++ b/tests/custom_cluster/test_executor_groups.py @@ -18,6 +18,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.util.concurrent_workload import ConcurrentWorkload @@ -375,7 +376,7 @@ class TestExecutorGroups(CustomClusterTestSuite): # a new query to fit (see IMPALA-9073). NUM_SAMPLES = 30 executor_slots_in_use = [] - for _ in xrange(NUM_SAMPLES): + for _ in range(NUM_SAMPLES): backends_json = json.loads( self.impalad_test_service.read_debug_webpage('backends?json')) for backend in backends_json['backends']: diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py index c2c66be49..b5e5db5e8 100644 --- a/tests/custom_cluster/test_hdfs_fd_caching.py +++ b/tests/custom_cluster/test_hdfs_fd_caching.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite diff --git a/tests/custom_cluster/test_incremental_metadata_updates.py b/tests/custom_cluster/test_incremental_metadata_updates.py index 82334f080..2f330f3d6 100755 --- a/tests/custom_cluster/test_incremental_metadata_updates.py +++ b/tests/custom_cluster/test_incremental_metadata_updates.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py index baeb453bc..25e0c9f62 100644 --- a/tests/custom_cluster/test_local_catalog.py +++ b/tests/custom_cluster/test_local_catalog.py @@ -18,6 +18,7 @@ # Test behaviors specific to --use_local_catalog being enabled. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import Queue import random @@ -144,7 +145,7 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite): # catalog pushes a new topic update. self.cluster.catalogd.start() NUM_ATTEMPTS = 30 - for attempt in xrange(NUM_ATTEMPTS): + for attempt in range(NUM_ATTEMPTS): try: self.assert_impalad_log_contains('WARNING', 'Detected catalog service restart') err = self.execute_query_expect_failure(client, "select * from %s" % view) @@ -445,7 +446,7 @@ class TestLocalCatalogRetries(CustomClusterTestSuite): # Prior to fixing IMPALA-7534, this test would fail within 20-30 iterations, # so 100 should be quite reliable as a regression test. NUM_ITERS = 100 - for i in t.imap_unordered(do_table, xrange(NUM_ITERS)): + for i in t.imap_unordered(do_table, range(NUM_ITERS)): pass class TestObservability(CustomClusterTestSuite): @@ -493,7 +494,7 @@ class TestObservability(CustomClusterTestSuite): "explain select count(*) from functional.alltypes", "create table %s (a int)" % test_table_name, "drop table %s" % test_table_name] - for _ in xrange(0, 10): + for _ in range(0, 10): for query in queries_to_test: ret = self.execute_query_expect_success(client, query) assert ret.runtime_profile.count("Frontend:") == 1 diff --git a/tests/custom_cluster/test_mem_reservations.py b/tests/custom_cluster/test_mem_reservations.py index f6e66ae81..36996f910 100644 --- a/tests/custom_cluster/test_mem_reservations.py +++ b/tests/custom_cluster/test_mem_reservations.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import threading @@ -84,7 +85,7 @@ class TestMemReservations(CustomClusterTestSuite): client = self.coordinator.service.create_beeswax_client() try: client.set_configuration(CONFIG_MAP) - for i in xrange(20): + for i in range(20): result = client.execute(self.query) assert result.success assert len(result.data) == 1 @@ -96,7 +97,7 @@ class TestMemReservations(CustomClusterTestSuite): # Create two threads to submit COORDINATOR_QUERY to two different coordinators concurrently. # They should both succeed. threads = [QuerySubmitThread(COORDINATOR_QUERY, self.cluster.impalads[i]) - for i in xrange(2)] + for i in range(2)] for t in threads: t.start() for t in threads: t.join() diff --git a/tests/custom_cluster/test_metadata_replicas.py b/tests/custom_cluster/test_metadata_replicas.py index 4afa4249f..47e628eea 100644 --- a/tests/custom_cluster/test_metadata_replicas.py +++ b/tests/custom_cluster/test_metadata_replicas.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.skip import SkipIfFS @@ -53,7 +54,7 @@ class TestMetadataReplicas(CustomClusterTestSuite): # Issue several invalidates to boost the version for the current incarnation of the # catalog. As a result, the table we'll add to Hive will get a version that's easier # to see is higher than the highest version of the restarted catalogd incarnation. - for i in xrange(0, 50): + for i in range(0, 50): self.client.execute("invalidate metadata functional.alltypes") assert self.cluster.catalogd.service.get_catalog_version() >= 50 # Creates a database and table with Hive and makes it visible to Impala. @@ -86,7 +87,7 @@ class TestMetadataReplicas(CustomClusterTestSuite): c_objects = self.cluster.catalogd.service.get_catalog_objects() i_objects = [proc.service.get_catalog_objects() for proc in self.cluster.impalads] - for idx in xrange(0, len(i_objects)): + for idx in range(0, len(i_objects)): i_obj = i_objects[idx] diff = self.__diff_catalog_objects(c_objects, i_obj) assert diff[0] == {},\ diff --git a/tests/custom_cluster/test_metastore_service.py b/tests/custom_cluster/test_metastore_service.py index c74ab63fd..2ce83907c 100644 --- a/tests/custom_cluster/test_metastore_service.py +++ b/tests/custom_cluster/test_metastore_service.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from hive_metastore.ttypes import Database from hive_metastore.ttypes import FieldSchema diff --git a/tests/custom_cluster/test_parquet_max_page_header.py b/tests/custom_cluster/test_parquet_max_page_header.py index 73496487b..52e13c690 100644 --- a/tests/custom_cluster/test_parquet_max_page_header.py +++ b/tests/custom_cluster/test_parquet_max_page_header.py @@ -18,6 +18,7 @@ # Tests for IMPALA-2273 from __future__ import absolute_import, division, print_function +from builtins import range import os import pytest import random @@ -91,9 +92,9 @@ class TestParquetMaxPageHeader(CustomClusterTestSuite): file_name = os.path.join(dir, file) # Create two 10MB long strings. random_text1 = "".join([random.choice(string.letters) - for i in xrange(self.MAX_STRING_LENGTH)]) + for i in range(self.MAX_STRING_LENGTH)]) random_text2 = "".join([random.choice(string.letters) - for i in xrange(self.MAX_STRING_LENGTH)]) + for i in range(self.MAX_STRING_LENGTH)]) put = subprocess.Popen(["hdfs", "dfs", "-put", "-d", "-f", "-", file_name], stdin=subprocess.PIPE, bufsize=-1) put.stdin.write(random_text1 + "\n") diff --git a/tests/custom_cluster/test_preload_table_types.py b/tests/custom_cluster/test_preload_table_types.py index f86427eac..bf57200de 100644 --- a/tests/custom_cluster/test_preload_table_types.py +++ b/tests/custom_cluster/test_preload_table_types.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from tests.common.custom_cluster_test_suite import CustomClusterTestSuite diff --git a/tests/custom_cluster/test_process_failures.py b/tests/custom_cluster/test_process_failures.py index 21464ed3f..c9e9cd3ea 100644 --- a/tests/custom_cluster/test_process_failures.py +++ b/tests/custom_cluster/test_process_failures.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from beeswaxd.BeeswaxService import QueryState @@ -76,7 +77,7 @@ class TestProcessFailures(CustomClusterTestSuite): handles = [] # Run num_concurrent_queries asynchronously - for _ in xrange(num_concurrent_queries): + for _ in range(num_concurrent_queries): handles.append(client.execute_async(query)) # Wait for the queries to start running @@ -87,7 +88,7 @@ class TestProcessFailures(CustomClusterTestSuite): impalad.kill() # Assert that all executors have 0 in-flight fragments - for i in xrange(1, len(self.cluster.impalads)): + for i in range(1, len(self.cluster.impalads)): self.cluster.impalads[i].service.wait_for_metric_value( "impala-server.num-fragments-in-flight", 0, timeout=30) diff --git a/tests/custom_cluster/test_query_expiration.py b/tests/custom_cluster/test_query_expiration.py index d7dd5a0c3..7e08dc81c 100644 --- a/tests/custom_cluster/test_query_expiration.py +++ b/tests/custom_cluster/test_query_expiration.py @@ -18,6 +18,7 @@ # Tests for query expiration. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import re import threading @@ -246,14 +247,14 @@ class TestQueryExpiration(CustomClusterTestSuite): num_expired = impalad.service.get_metric_value('impala-server.num-queries-expired') non_expiring_threads = \ [NonExpiringQueryThread(impalad.service.create_beeswax_client()) - for _ in xrange(5)] + for _ in range(5)] expiring_threads = [ExpiringQueryThread(impalad.service.create_beeswax_client()) - for _ in xrange(5)] + for _ in range(5)] time_limit_threads = [TimeLimitThread(impalad.service.create_beeswax_client()) - for _ in xrange(5)] + for _ in range(5)] non_expiring_time_limit_threads = [ NonExpiringTimeLimitThread(impalad.service.create_beeswax_client()) - for _ in xrange(5)] + for _ in range(5)] all_threads = non_expiring_threads + expiring_threads + time_limit_threads +\ non_expiring_time_limit_threads for t in all_threads: diff --git a/tests/custom_cluster/test_query_retries.py b/tests/custom_cluster/test_query_retries.py index 5d321ceed..56e852970 100644 --- a/tests/custom_cluster/test_query_retries.py +++ b/tests/custom_cluster/test_query_retries.py @@ -22,6 +22,7 @@ # TODO: Add a test that cancels queries while a retry is running from __future__ import absolute_import, division, print_function +from builtins import map, range import pytest import re import shutil @@ -205,14 +206,14 @@ class TestQueryRetries(CustomClusterTestSuite): # Launch a set of concurrent queries. num_concurrent_queries = 3 handles = [] - for _ in xrange(num_concurrent_queries): + for _ in range(num_concurrent_queries): handle = self.execute_query_async(self._shuffle_heavy_query, query_options={'retry_failed_queries': 'true'}) handles.append(handle) # Wait for each query to start running. running_state = self.client.QUERY_STATES['RUNNING'] - map(lambda handle: self.wait_for_state(handle, running_state, 60), handles) + list(map(lambda handle: self.wait_for_state(handle, running_state, 60), handles)) # Kill a random impalad. killed_impalad = self.__kill_random_impalad() @@ -1180,7 +1181,7 @@ class TestQueryRetriesFaultyDisk(CustomClusterTestSuite): def __generate_scratch_dir(self, num): result = [] - for i in xrange(num): + for i in range(num): dir_path = tempfile.mkdtemp() self.created_dirs.append(dir_path) result.append(dir_path) diff --git a/tests/custom_cluster/test_restart_services.py b/tests/custom_cluster/test_restart_services.py index 7d9c003d1..1492089f5 100644 --- a/tests/custom_cluster/test_restart_services.py +++ b/tests/custom_cluster/test_restart_services.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import logging import os import pytest @@ -59,7 +60,7 @@ class TestRestart(CustomClusterTestSuite): # existing metrics yet so we wait for some time here. wait_time_s = build_flavor_timeout(60, slow_build_timeout=100) sleep(wait_time_s) - for retry in xrange(wait_time_s): + for retry in range(wait_time_s): try: cursor.execute("describe database functional") return @@ -82,7 +83,7 @@ class TestRestart(CustomClusterTestSuite): client = self.cluster.impalads[0].service.create_beeswax_client() assert client is not None - for i in xrange(5): + for i in range(5): self.execute_query_expect_success(client, "select * from functional.alltypes") node_to_restart = 1 + (i % 2) self.cluster.impalads[node_to_restart].restart() diff --git a/tests/custom_cluster/test_rpc_timeout.py b/tests/custom_cluster/test_rpc_timeout.py index 410e22e9e..375a4b9c4 100644 --- a/tests/custom_cluster/test_rpc_timeout.py +++ b/tests/custom_cluster/test_rpc_timeout.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.custom_cluster_test_suite import CustomClusterTestSuite @@ -141,7 +142,7 @@ class TestRPCTimeout(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--backend_client_rpc_timeout_ms=1000" " --datastream_sender_timeout_ms=30000 --debug_actions=%s" % - "|".join(map(lambda rpc: "%s_DELAY:JITTER@[email protected]" % rpc, all_rpcs))) + "|".join(["%s_DELAY:JITTER@[email protected]" % rpc for rpc in all_rpcs])) def test_random_rpc_timeout(self, vector): self.execute_query_verify_metrics(self.TEST_QUERY, None, 10) diff --git a/tests/custom_cluster/test_scratch_disk.py b/tests/custom_cluster/test_scratch_disk.py index dc3797b8e..db1c1c08c 100644 --- a/tests/custom_cluster/test_scratch_disk.py +++ b/tests/custom_cluster/test_scratch_disk.py @@ -18,6 +18,7 @@ # Tests for query expiration. from __future__ import absolute_import, division, print_function +from builtins import range import os import pytest import re @@ -74,7 +75,7 @@ class TestScratchDir(CustomClusterTestSuite): def generate_dirs(self, num, writable=True, non_existing=False): result = [] - for i in xrange(num): + for i in range(num): dir_path = tempfile.mkdtemp() if non_existing: shutil.rmtree(dir_path) diff --git a/tests/custom_cluster/test_set_and_unset.py b/tests/custom_cluster/test_set_and_unset.py index 3425a5895..2abe2586b 100644 --- a/tests/custom_cluster/test_set_and_unset.py +++ b/tests/custom_cluster/test_set_and_unset.py @@ -171,5 +171,5 @@ class TestSetAndUnset(CustomClusterTestSuite, HS2TestSuite): fetch_results_req.operationHandle = execute_statement_resp.operationHandle fetch_results_req.maxRows = 100 fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req) - return zip(fetch_results_resp.results.columns[0].stringVal.values, - fetch_results_resp.results.columns[1].stringVal.values) + return list(zip(fetch_results_resp.results.columns[0].stringVal.values, + fetch_results_resp.results.columns[1].stringVal.values)) diff --git a/tests/custom_cluster/test_topic_update_frequency.py b/tests/custom_cluster/test_topic_update_frequency.py index f5e6c92f5..f7e2b1581 100644 --- a/tests/custom_cluster/test_topic_update_frequency.py +++ b/tests/custom_cluster/test_topic_update_frequency.py @@ -10,6 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import, division, print_function +from builtins import range from multiprocessing.pool import ThreadPool import pytest diff --git a/tests/custom_cluster/test_udf_concurrency.py b/tests/custom_cluster/test_udf_concurrency.py index e9f9b8575..aa35eac39 100644 --- a/tests/custom_cluster/test_udf_concurrency.py +++ b/tests/custom_cluster/test_udf_concurrency.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import os import pytest import random @@ -114,11 +115,11 @@ class TestUdfConcurrency(CustomClusterTestSuite): # create threads to use native function. runner_threads = [] - for i in xrange(num_uses): + for i in range(num_uses): runner_threads.append(threading.Thread(target=use_fn_method)) # create threads to drop/create native functions. - for i in xrange(num_loads): + for i in range(num_loads): runner_threads.append(threading.Thread(target=load_fn_method)) # launch all runner threads. diff --git a/tests/custom_cluster/test_wide_table_operations.py b/tests/custom_cluster/test_wide_table_operations.py index 26daf46aa..85657dcea 100644 --- a/tests/custom_cluster/test_wide_table_operations.py +++ b/tests/custom_cluster/test_wide_table_operations.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import os import pytest from subprocess import call diff --git a/tests/failure/test_failpoints.py b/tests/failure/test_failpoints.py index e95aa1e86..38f0919e5 100644 --- a/tests/failure/test_failpoints.py +++ b/tests/failure/test_failpoints.py @@ -19,6 +19,7 @@ # two types of failures - cancellation of the query and a failure test hook. # from __future__ import absolute_import, division, print_function +from builtins import range import pytest import re from time import sleep diff --git a/tests/hs2/hs2_test_suite.py b/tests/hs2/hs2_test_suite.py index b8acbc502..3c6720529 100644 --- a/tests/hs2/hs2_test_suite.py +++ b/tests/hs2/hs2_test_suite.py @@ -18,6 +18,7 @@ # Superclass of all HS2 tests containing commonly used functions. from __future__ import absolute_import, division, print_function +from builtins import range from getpass import getuser from TCLIService import TCLIService from ImpalaService import ImpalaHiveServer2Service @@ -281,7 +282,7 @@ class HS2TestSuite(ImpalaTestSuite): num_rows = len(typed_col.values) break - for i in xrange(num_rows): + for i in range(num_rows): row = [] for c in columns: for col_type in HS2TestSuite.HS2_V6_COLUMN_TYPES: diff --git a/tests/hs2/test_fetch_first.py b/tests/hs2/test_fetch_first.py index d40b1d0bb..6fccc80f0 100644 --- a/tests/hs2/test_fetch_first.py +++ b/tests/hs2/test_fetch_first.py @@ -21,6 +21,7 @@ # succeed as long all previously fetched rows fit into the bounded result cache. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from ImpalaService import ImpalaHiveServer2Service @@ -199,7 +200,7 @@ class TestFetchFirst(HS2TestSuite): "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) - for i in xrange(1, 5): + for i in range(1, 5): # Fetch 10 rows with the FETCH_NEXT orientation. expected_num_rows = 10 if i == 4: @@ -220,7 +221,7 @@ class TestFetchFirst(HS2TestSuite): execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 30" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) - for _ in xrange(1, 5): + for _ in range(1, 5): self.fetch_until(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 30) self.__verify_num_cached_rows(30) @@ -339,7 +340,7 @@ class TestFetchFirst(HS2TestSuite): execute_statement_req.statement =\ "SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 0" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) - for i in xrange(0, 3): + for i in range(0, 3): # Fetch some rows. Expect to get 0 rows. self.fetch_at_most(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_NEXT, i * 10, 0) @@ -358,7 +359,7 @@ class TestFetchFirst(HS2TestSuite): self.fetch_at_most(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 100, 1) self.__verify_num_cached_rows(1) - for i in xrange(0, 3): + for i in range(0, 3): # Fetch some rows with FETCH_FIRST. Expect to get 1 row. self.fetch_at_most(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, i * 10, 1) @@ -391,7 +392,7 @@ class TestFetchFirst(HS2TestSuite): execute_statement_req.statement = "show table stats functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) - for i in xrange(1, 5): + for i in range(1, 5): # Fetch 10 rows with the FETCH_NEXT orientation. expected_num_rows = 10 if i == 3: @@ -414,7 +415,7 @@ class TestFetchFirst(HS2TestSuite): execute_statement_req.statement = "show table stats functional.alltypes" execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) HS2TestSuite.check_response(execute_statement_resp) - for _ in xrange(1, 5): + for _ in range(1, 5): self.fetch_until(execute_statement_resp.operationHandle, TCLIService.TFetchOrientation.FETCH_FIRST, 30, 25) # The results of non-query stmts are not counted as 'cached'. diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py index e87877b54..6353fbf95 100644 --- a/tests/hs2/test_hs2.py +++ b/tests/hs2/test_hs2.py @@ -18,6 +18,7 @@ # Client tests for Impala's HiveServer2 interface from __future__ import absolute_import, division, print_function +from builtins import range from getpass import getuser from contextlib import contextmanager import json @@ -408,7 +409,7 @@ class TestHS2(HS2TestSuite): num_sessions = self.impalad_test_service.get_metric_value( "impala-server.num-open-hiveserver2-sessions") session_ids = [] - for _ in xrange(5): + for _ in range(5): open_session_req = TCLIService.TOpenSessionReq() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) @@ -883,7 +884,7 @@ class TestHS2(HS2TestSuite): args=(profile_fetch_exception, query_uuid, op_handle))) # Start threads that will race to unregister the query. - for i in xrange(NUM_UNREGISTER_THREADS): + for i in range(NUM_UNREGISTER_THREADS): socket, client = self._open_hs2_connection() sockets.append(socket) threads.append(threading.Thread(target=self._unregister_query, diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py index 7f4bdff9a..7d4007a4c 100644 --- a/tests/metadata/test_compute_stats.py +++ b/tests/metadata/test_compute_stats.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from subprocess import check_call @@ -183,7 +184,7 @@ class TestComputeStats(ImpalaTestSuite): # Check that the template formulated above exists and row count of the table is # not zero, for all scans. - for i in xrange(len(explain_result.data)): + for i in range(len(explain_result.data)): if ("SCAN HDFS" in explain_result.data[i]): assert(hdfs_physical_properties_template in explain_result.data[i + 1]) assert("cardinality=0" not in explain_result.data[i + 2]) diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index ff9357315..95dc10176 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import map, range import getpass import itertools import pytest @@ -303,10 +304,10 @@ class TestDdlStatements(TestDdlBase): def test_create_table_like_file_orc(self, vector, unique_database): COMPLEXTYPETBL_PATH = 'test-warehouse/managed/functional_orc_def.db/' \ 'complextypestbl_orc_def/' - base_dir = filter(lambda s: s.startswith('base'), - self.filesystem_client.ls(COMPLEXTYPETBL_PATH))[0] - bucket_file = filter(lambda s: s.startswith('bucket'), - self.filesystem_client.ls(COMPLEXTYPETBL_PATH + base_dir))[0] + base_dir = list(filter(lambda s: s.startswith('base'), + self.filesystem_client.ls(COMPLEXTYPETBL_PATH)))[0] + bucket_file = list(filter(lambda s: s.startswith('bucket'), + self.filesystem_client.ls(COMPLEXTYPETBL_PATH + base_dir)))[0] vector.get_value('exec_option')['abort_on_error'] = False create_table_from_orc(self.client, unique_database, 'timestamp_with_local_timezone') @@ -623,7 +624,7 @@ class TestDdlStatements(TestDdlBase): "location '{1}/{0}'".format(fq_tbl_name, WAREHOUSE)) # Add some partitions (first batch of two) - for i in xrange(num_parts // 5): + for i in range(num_parts // 5): start = time.time() self.client.execute( "alter table {0} add partition(j={1}, s='{1}')".format(fq_tbl_name, i)) @@ -645,7 +646,7 @@ class TestDdlStatements(TestDdlBase): .format(fq_tbl_name, WAREHOUSE)) # Add some more partitions - for i in xrange(num_parts // 5, num_parts): + for i in range(num_parts // 5, num_parts): start = time.time() self.client.execute( "alter table {0} add partition(j={1},s='{1}')".format(fq_tbl_name, i)) @@ -676,8 +677,8 @@ class TestDdlStatements(TestDdlBase): result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % fq_tbl_name) - assert 1 == len(filter(lambda line: line.find("PARQUET") != -1, result.data)) - assert 2 == len(filter(lambda line: line.find("ORC") != -1, result.data)) + assert 1 == len([line for line in result.data if line.find("PARQUET") != -1]) + assert 2 == len([line for line in result.data if line.find("ORC") != -1]) def test_alter_table_create_many_partitions(self, vector, unique_database): """ @@ -688,7 +689,7 @@ class TestDdlStatements(TestDdlBase): "create table {0}.t(i int) partitioned by (p int)".format(unique_database)) MAX_PARTITION_UPDATES_PER_RPC = 500 alter_stmt = "alter table {0}.t add ".format(unique_database) + " ".join( - "partition(p=%d)" % (i,) for i in xrange(MAX_PARTITION_UPDATES_PER_RPC + 2)) + "partition(p=%d)" % (i,) for i in range(MAX_PARTITION_UPDATES_PER_RPC + 2)) self.client.execute(alter_stmt) partitions = self.client.execute("show partitions {0}.t".format(unique_database)) # Show partitions will contain partition HDFS paths, which we expect to contain @@ -696,8 +697,8 @@ class TestDdlStatements(TestDdlBase): # paths, converts them to integers, and checks that wehave all the ones we # expect. PARTITION_RE = re.compile("p=([0-9]+)") - assert map(int, PARTITION_RE.findall(str(partitions))) == \ - range(MAX_PARTITION_UPDATES_PER_RPC + 2) + assert list(map(int, PARTITION_RE.findall(str(partitions)))) == \ + list(range(MAX_PARTITION_UPDATES_PER_RPC + 2)) def test_create_alter_tbl_properties(self, vector, unique_database): fq_tbl_name = unique_database + ".test_alter_tbl" @@ -1231,7 +1232,7 @@ class TestLibCache(TestDdlBase): """ self.client.set_configuration(vector.get_value("exec_option")) for drop_stmt in drop_stmts: self.client.execute(drop_stmt % ("if exists")) - for i in xrange(0, num_iterations): + for i in range(0, num_iterations): for create_stmt in create_stmts: self.client.execute(create_stmt) self.client.execute(select_stmt) for drop_stmt in drop_stmts: self.client.execute(drop_stmt % ("")) diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py index 343f67877..9caa5dda0 100644 --- a/tests/metadata/test_hms_integration.py +++ b/tests/metadata/test_hms_integration.py @@ -24,6 +24,7 @@ # Impala, in all the possible ways of validating that metadata. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import random import string @@ -235,7 +236,7 @@ class TestHmsIntegration(ImpalaTestSuite): dictionary that holds the parsed attributes.""" result = {} output_lines = output.split('\n') - stat_names = map(string.strip, output_lines[0].split(',')) + stat_names = list(map(string.strip, output_lines[0].split(','))) stat_values = output_lines[3].split(',') assert len(stat_names) == len(stat_values) for i in range(0, len(stat_names)): @@ -247,7 +248,7 @@ class TestHmsIntegration(ImpalaTestSuite): dictionary that holds the parsed attributes.""" result = {} for line in output.split('\n'): - line_elements = map(string.strip, line.split(',')) + line_elements = list(map(string.strip, line.split(','))) if len(line_elements) >= 2: result[line_elements[0]] = line_elements[1] return result diff --git a/tests/metadata/test_load.py b/tests/metadata/test_load.py index 9dc3500e8..272f8bd0a 100644 --- a/tests/metadata/test_load.py +++ b/tests/metadata/test_load.py @@ -18,6 +18,7 @@ # Functional tests for LOAD DATA statements. from __future__ import absolute_import, division, print_function +from builtins import range import time from beeswaxd.BeeswaxService import QueryState from copy import deepcopy @@ -77,18 +78,18 @@ class TestLoadData(ImpalaTestSuite): # - Sub Directories 5-6 have multiple files (4) copied from alltypesaggmultifiles # - Sub Directory 3 also has hidden files, in both supported formats. # - All sub-dirs contain a hidden directory - for i in xrange(1, 6): + for i in range(1, 6): stagingDir = '{0}/{1}'.format(STAGING_PATH, i) self.filesystem_client.make_dir(stagingDir, permission=777) self.filesystem_client.make_dir('{0}/_hidden_dir'.format(stagingDir), permission=777) # Copy single file partitions from alltypes. - for i in xrange(1, 4): + for i in range(1, 4): self.filesystem_client.copy(ALLTYPES_PATH, "{0}/{1}/100101.txt".format(STAGING_PATH, i)) # Copy multi file partitions from alltypesaggmultifiles. file_names = self.filesystem_client.ls(MULTIAGG_PATH) - for i in xrange(4, 6): + for i in range(4, 6): for file_ in file_names: self.filesystem_client.copy( "{0}/{1}".format(MULTIAGG_PATH, file_), diff --git a/tests/metadata/test_recover_partitions.py b/tests/metadata/test_recover_partitions.py index 131c31155..b6700a67b 100644 --- a/tests/metadata/test_recover_partitions.py +++ b/tests/metadata/test_recover_partitions.py @@ -18,6 +18,7 @@ # Impala tests for ALTER TABLE RECOVER PARTITIONS statement from __future__ import absolute_import, division, print_function +from builtins import range import os from six.moves import urllib from tests.common.impala_test_suite import ImpalaTestSuite @@ -177,7 +178,7 @@ class TestRecoverPartitions(ImpalaTestSuite): "CREATE TABLE %s (c int) PARTITIONED BY (s string)" % (FQ_TBL_NAME)) # Create 700 partitions externally - for i in xrange(1, 700): + for i in range(1, 700): PART_DIR = "s=part%d/" % i FILE_PATH = "test" INSERTED_VALUE = "666" @@ -185,14 +186,14 @@ class TestRecoverPartitions(ImpalaTestSuite): result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) - for i in xrange(1, 700): + for i in range(1, 700): PART_DIR = "part%d\t" % i assert not self.has_value(PART_DIR, result.data) self.execute_query_expect_success(self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) - for i in xrange(1, 700): + for i in range(1, 700): PART_DIR = "part%d\t" % i assert self.has_value(PART_DIR, result.data) @@ -344,7 +345,7 @@ class TestRecoverPartitions(ImpalaTestSuite): # Running ALTER TABLE RECOVER PARTITIONS multiple times should only produce # a single partition when adding a single partition. - for i in xrange(3): + for i in range(3): self.execute_query_expect_success( self.client, "ALTER TABLE %s RECOVER PARTITIONS" % FQ_TBL_NAME) result = self.execute_query_expect_success( @@ -392,7 +393,7 @@ class TestRecoverPartitions(ImpalaTestSuite): # Adds partition directories. num_partitions = 10 - for i in xrange(1, num_partitions): + for i in range(1, num_partitions): PART_DIR = "i=%d/s=part%d" % (i,i) self.filesystem_client.make_dir(TBL_LOCATION + PART_DIR) @@ -410,7 +411,7 @@ class TestRecoverPartitions(ImpalaTestSuite): result = self.execute_query_expect_success(self.client, "SHOW PARTITIONS %s" % FQ_TBL_NAME) assert num_partitions - 1 == self.count_partition(result.data) - for i in xrange(1, num_partitions): + for i in range(1, num_partitions): PART_DIR = "part%d\t" % i assert self.has_value(PART_DIR, result.data) @@ -457,7 +458,7 @@ class TestRecoverPartitions(ImpalaTestSuite): def count_value(self, value, lines): """Count the number of lines that contain value.""" - return len(filter(lambda line: line.find(value) != -1, lines)) + return len([line for line in lines if line.find(value) != -1]) def verify_partitions(self, expected_parts, lines): """Check if all partition values are expected""" diff --git a/tests/metadata/test_recursive_listing.py b/tests/metadata/test_recursive_listing.py index 9273387e4..de6cfc91d 100644 --- a/tests/metadata/test_recursive_listing.py +++ b/tests/metadata/test_recursive_listing.py @@ -11,6 +11,7 @@ # limitations under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import requests import time diff --git a/tests/metadata/test_stats_extrapolation.py b/tests/metadata/test_stats_extrapolation.py index 00aa721ad..88352d4dc 100644 --- a/tests/metadata/test_stats_extrapolation.py +++ b/tests/metadata/test_stats_extrapolation.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from os import path from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfEC @@ -146,7 +147,7 @@ class TestStatsExtrapolation(ImpalaTestSuite): col_names = [fs.name.upper() for fs in actual.schema.fieldSchemas] rows_col_idx = col_names.index("#ROWS") extrap_rows_col_idx = col_names.index("EXTRAP #ROWS") - for i in xrange(0, len(actual.data)): + for i in range(0, len(actual.data)): act_cols = actual.data[i].split("\t") exp_cols = expected.data[i].split("\t") assert int(exp_cols[rows_col_idx]) >= 0 @@ -173,7 +174,7 @@ class TestStatsExtrapolation(ImpalaTestSuite): assert len(actual.schema.fieldSchemas) == len(expected.schema.fieldSchemas) col_names = [fs.name.upper() for fs in actual.schema.fieldSchemas] ndv_col_idx = col_names.index("#DISTINCT VALUES") - for i in xrange(0, len(actual.data)): + for i in range(0, len(actual.data)): act_cols = actual.data[i].split("\t") exp_cols = expected.data[i].split("\t") assert int(exp_cols[ndv_col_idx]) >= 0 diff --git a/tests/performance/scheduler.py b/tests/performance/scheduler.py index 95d2a7a13..f0850fdaf 100644 --- a/tests/performance/scheduler.py +++ b/tests/performance/scheduler.py @@ -20,6 +20,7 @@ # vector. It treats a workload an the unit of parallelism. from __future__ import absolute_import, division, print_function +from builtins import range import logging from collections import defaultdict @@ -78,7 +79,7 @@ class Scheduler(object): Each workload thread is analogus to a client name, and is identified by a unique ID, the workload that's being run and the table formats it's being run on.""" - for thread_num in xrange(self.num_clients): + for thread_num in range(self.num_clients): thread = Thread(target=self._run_queries, args=[thread_num], name=self._thread_name % thread_num) thread.daemon = True @@ -98,7 +99,7 @@ class Scheduler(object): # each thread gets its own copy of query_executors query_executors = deepcopy(sorted(self.query_executors, key=lambda x: x.query.name)) - for j in xrange(self.iterations): + for j in range(self.iterations): # Randomize the order of execution for each iteration if specified. if self.shuffle: shuffle(query_executors) results = defaultdict(list) @@ -106,7 +107,7 @@ class Scheduler(object): for query_executor in query_executors: query_name = query_executor.query.name LOG.info("Running Query: %s" % query_name) - for i in xrange(self.query_iterations): + for i in range(self.query_iterations): if self._exit.isSet(): LOG.error("Another thread failed, exiting.") exit(1) diff --git a/tests/query_test/test_aggregation.py b/tests/query_test/test_aggregation.py index cf399e20d..1cf2942ff 100644 --- a/tests/query_test/test_aggregation.py +++ b/tests/query_test/test_aggregation.py @@ -18,6 +18,7 @@ # Validates all aggregate functions across all datatypes # from __future__ import absolute_import, division, print_function +from builtins import range import pytest from testdata.common import widetable @@ -279,7 +280,7 @@ class TestAggregationQueries(ImpalaTestSuite): ] # For each possible integer value, genereate one query and test it out. - for i in xrange(1, 11): + for i in range(1, 11): ndv_stmt = """ select ndv(bool_col, {0}), ndv(tinyint_col, {0}), ndv(smallint_col, {0}), ndv(int_col, {0}), @@ -299,7 +300,7 @@ class TestAggregationQueries(ImpalaTestSuite): # Verify that each ndv() value (one per column for a total of 11) is identical # to the corresponding known value. Since NDV() invokes Hash64() hash function # with a fixed seed value, ndv() result is deterministic. - for j in xrange(0, 11): + for j in range(0, 11): assert(ndv_results[i - 1][j] == int(ndv_vals[j])) def test_grouping_sets(self, vector): @@ -393,7 +394,7 @@ class TestAggregationQueriesRunOnce(ImpalaTestSuite): assert len(sampled_ndv_vals) == len(ndv_vals) # Low NDV columns. We expect a reasonaby accurate estimate regardless of the # sampling percent. - for i in xrange(0, 14): + for i in range(0, 14): self.appx_equals(int(sampled_ndv_vals[i]), int(ndv_vals[i]), 0.1) # High NDV columns. We expect the estimate to have high variance and error. # Since we give NDV() and SAMPLED_NDV() the same input data, i.e., we are not @@ -401,7 +402,7 @@ class TestAggregationQueriesRunOnce(ImpalaTestSuite): # be bigger than NDV() proportional to the sampling percent. # For example, the column 'id' is a PK so we expect the result of SAMPLED_NDV() # with a sampling percent of 0.1 to be approximately 10x of the NDV(). - for i in xrange(14, 16): + for i in range(14, 16): self.appx_equals(int(sampled_ndv_vals[i]) * sample_perc, int(ndv_vals[i]), 2.0) diff --git a/tests/query_test/test_avro_schema_resolution.py b/tests/query_test/test_avro_schema_resolution.py index b6ed92bff..e9a6a66f5 100644 --- a/tests/query_test/test_avro_schema_resolution.py +++ b/tests/query_test/test_avro_schema_resolution.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfCatalogV2 diff --git a/tests/query_test/test_cancellation.py b/tests/query_test/test_cancellation.py index 784c8ef24..c6cfb50ad 100644 --- a/tests/query_test/test_cancellation.py +++ b/tests/query_test/test_cancellation.py @@ -19,6 +19,7 @@ # from __future__ import absolute_import, division, print_function +from builtins import range import pytest import threading from time import sleep @@ -74,7 +75,7 @@ JOIN_BEFORE_CLOSE = [False, True] # Extra dimensions to test order by without limit SORT_QUERY = 'select * from lineitem order by l_orderkey' -SORT_CANCEL_DELAY = range(6, 10) +SORT_CANCEL_DELAY = list(range(6, 10)) SORT_BUFFER_POOL_LIMIT = ['0', '300m'] # Test spilling and non-spilling sorts. # Test with and without multithreading @@ -153,7 +154,7 @@ class TestCancellation(ImpalaTestSuite): wait_action = vector.get_value('wait_action') fail_rpc_action = vector.get_value('fail_rpc_action') - debug_action = "|".join(filter(None, [wait_action, fail_rpc_action])) + debug_action = "|".join([_f for _f in [wait_action, fail_rpc_action] if _f]) vector.get_value('exec_option')['debug_action'] = debug_action vector.get_value('exec_option')['buffer_pool_limit'] =\ @@ -162,7 +163,7 @@ class TestCancellation(ImpalaTestSuite): vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop') # Execute the query multiple times, cancelling it each time. - for i in xrange(NUM_CANCELATION_ITERATIONS): + for i in range(NUM_CANCELATION_ITERATIONS): cancel_query_and_validate_state(self.client, query, vector.get_value('exec_option'), vector.get_value('table_format'), vector.get_value('cancel_delay'), vector.get_value('join_before_close')) diff --git a/tests/query_test/test_cast_with_format.py b/tests/query_test/test_cast_with_format.py index e73a6f9cf..8d49c7d32 100644 --- a/tests/query_test/test_cast_with_format.py +++ b/tests/query_test/test_cast_with_format.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_dimensions import create_client_protocol_dimension diff --git a/tests/query_test/test_compressed_formats.py b/tests/query_test/test_compressed_formats.py index 894f237f0..333fc349c 100644 --- a/tests/query_test/test_compressed_formats.py +++ b/tests/query_test/test_compressed_formats.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import math import os import pytest diff --git a/tests/query_test/test_decimal_casting.py b/tests/query_test/test_decimal_casting.py index 8c4ea5912..e0435a2c1 100644 --- a/tests/query_test/test_decimal_casting.py +++ b/tests/query_test/test_decimal_casting.py @@ -18,6 +18,7 @@ # Validates that casting to Decimal works. # from __future__ import absolute_import, division, print_function +from builtins import range import pytest from decimal import Decimal, getcontext, ROUND_DOWN, ROUND_HALF_UP from allpairspy import AllPairs as all_pairs @@ -39,11 +40,11 @@ class TestDecimalCasting(ImpalaTestSuite): DECIMAL_TYPES_MAP = { # All possible decimal types. # (0 < precision <= 38 && 0 <= scale <= 38 && scale <= precision) - 'exhaustive' : [(p, s) for p in xrange(1, 39) for s in xrange(0, p + 1)], + 'exhaustive': [(p, s) for p in range(1, 39) for s in range(0, p + 1)], # Core only deals with precision 6,16,26 (different integer types) - 'core' : [(p, s) for p in [6,16,26] for s in xrange(0, p + 1)], + 'core': [(p, s) for p in [6, 16, 26] for s in range(0, p + 1)], # mimics test_vectors.py and takes a subset of all decimal types - 'pairwise' : all_pairs([(p, s) for p in xrange(1, 39) for s in xrange(0, p + 1)]) + 'pairwise': all_pairs([(p, s) for p in range(1, 39) for s in range(0, p + 1)]) } # We can cast for numerics or string types. CAST_FROM = ['string', 'number'] @@ -121,7 +122,7 @@ class TestDecimalCasting(ImpalaTestSuite): precision, scale = vector.get_value('decimal_type') if vector.get_value('cast_from') == 'decimal': pytest.skip("Casting between the same decimal type isn't interesting") - for i in xrange(self.iterations): + for i in range(self.iterations): val = self._gen_decimal_val(precision, scale) cast = self._normalize_cast_expr(val, precision, vector.get_value('cast_from'))\ .format(val, precision, scale) @@ -132,7 +133,7 @@ class TestDecimalCasting(ImpalaTestSuite): """Test to verify that we always return NULL when trying to cast a number with greater precision that its intended decimal type""" precision, scale = vector.get_value('decimal_type') - for i in xrange(self.iterations): + for i in range(self.iterations): # Generate a decimal with a larger precision than the one we're casting to. from_precision = randint(precision + 1, 39) val = self._gen_decimal_val(from_precision, scale) @@ -150,7 +151,7 @@ class TestDecimalCasting(ImpalaTestSuite): if precision == scale: pytest.skip("Cannot underflow scale when precision and scale are equal") - for i in xrange(self.iterations): + for i in range(self.iterations): from_scale = randint(scale + 1, precision) val = self._gen_decimal_val(precision, from_scale) cast = self._normalize_cast_expr(val, precision, cast_from)\ diff --git a/tests/query_test/test_decimal_fuzz.py b/tests/query_test/test_decimal_fuzz.py index 1f468960c..d55251f00 100644 --- a/tests/query_test/test_decimal_fuzz.py +++ b/tests/query_test/test_decimal_fuzz.py @@ -19,6 +19,7 @@ # operations return correct results under decimal_v2. from __future__ import absolute_import, division, print_function +from builtins import range import decimal import math import pytest @@ -190,7 +191,7 @@ class TestDecimalFuzz(ImpalaTestSuite): return True return False - for num_digits_after_dot in xrange(39): + for num_digits_after_dot in range(39): # Reduce the number of digits after the dot in the expected_result to different # amounts. If it matches the actual result in at least one of the cases, we # consider the actual result to be acceptable. @@ -245,7 +246,7 @@ class TestDecimalFuzz(ImpalaTestSuite): assert self.result_equals(expected_result, result) def test_decimal_ops(self, vector): - for _ in xrange(self.iterations): + for _ in range(self.iterations): self.execute_one_decimal_op() def width_bucket(self, val, min_range, max_range, num_buckets): @@ -299,5 +300,5 @@ class TestDecimalFuzz(ImpalaTestSuite): raise e def test_width_bucket(self, vector): - for _ in xrange(self.iterations): + for _ in range(self.iterations): self.execute_one_width_bucket() diff --git a/tests/query_test/test_exprs.py b/tests/query_test/test_exprs.py index f396841d0..569e584e8 100644 --- a/tests/query_test/test_exprs.py +++ b/tests/query_test/test_exprs.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest import re from random import randint @@ -102,7 +103,7 @@ class TestExprLimits(ImpalaTestSuite): def test_expr_child_limit(self, vector): # IN predicate in_query = "select 1 IN(" - for i in xrange(0, self.EXPR_CHILDREN_LIMIT - 1): + for i in range(0, self.EXPR_CHILDREN_LIMIT - 1): in_query += str(i) if (i + 1 != self.EXPR_CHILDREN_LIMIT - 1): in_query += "," @@ -111,7 +112,7 @@ class TestExprLimits(ImpalaTestSuite): # CASE expr case_query = "select case " - for i in xrange(0, self.EXPR_CHILDREN_LIMIT // 2): + for i in range(0, self.EXPR_CHILDREN_LIMIT // 2): case_query += " when true then 1" case_query += " end" self.__exec_query(case_query) @@ -181,7 +182,7 @@ class TestExprLimits(ImpalaTestSuite): def __gen_huge_case(self, col_name, fanout, depth, indent): toks = ["case\n"] - for i in xrange(fanout): + for i in range(fanout): add = randint(1, 1000000) divisor = randint(1, 10000000) mod = randint(0, divisor) @@ -200,16 +201,16 @@ class TestExprLimits(ImpalaTestSuite): def __gen_deep_infix_expr(self, prefix, repeat_suffix): expr = prefix - for i in xrange(self.EXPR_DEPTH_LIMIT - 1): + for i in range(self.EXPR_DEPTH_LIMIT - 1): expr += repeat_suffix return expr def __gen_deep_func_expr(self, open_func, base_arg, close_func): expr = "" - for i in xrange(self.EXPR_DEPTH_LIMIT - 1): + for i in range(self.EXPR_DEPTH_LIMIT - 1): expr += open_func expr += base_arg - for i in xrange(self.EXPR_DEPTH_LIMIT - 1): + for i in range(self.EXPR_DEPTH_LIMIT - 1): expr += close_func return expr diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py index 105895af2..f90e83be2 100644 --- a/tests/query_test/test_hdfs_caching.py +++ b/tests/query_test/test_hdfs_caching.py @@ -18,6 +18,7 @@ # Validates limit on scan nodes from __future__ import absolute_import, division, print_function +from builtins import range import pytest import re import time @@ -103,7 +104,7 @@ class TestHdfsCaching(ImpalaTestSuite): select * from t1, t2, t3 where t1.x = t2.x and t2.x = t3.x """ # Run this query for some iterations since it is timing dependent. - for x in xrange(1, num_iters): + for x in range(1, num_iters): result = self.execute_query(query_string) assert(len(result.data) == 2) diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py index 76491ce65..6f64ced0e 100644 --- a/tests/query_test/test_iceberg.py +++ b/tests/query_test/test_iceberg.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import datetime import logging import os diff --git a/tests/query_test/test_insert_behaviour.py b/tests/query_test/test_insert_behaviour.py index f4b269c32..9f55a9377 100644 --- a/tests/query_test/test_insert_behaviour.py +++ b/tests/query_test/test_insert_behaviour.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import getpass import grp import os diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py index 33fc83af5..dc72c63b6 100644 --- a/tests/query_test/test_insert_parquet.py +++ b/tests/query_test/test_insert_parquet.py @@ -18,6 +18,7 @@ # Targeted Impala insert tests from __future__ import absolute_import, division, print_function +from builtins import map, range import os from collections import namedtuple @@ -633,7 +634,8 @@ class TestHdfsParquetTableStatsWriter(ImpalaTestSuite): num_columns = len(table_stats) assert num_columns == len(expected_values) - for col_idx, stats, expected in zip(range(num_columns), table_stats, expected_values): + for col_idx, stats, expected in zip(list(range(num_columns)), + table_stats, expected_values): if col_idx in skip_col_idxs: continue if not expected: @@ -931,7 +933,7 @@ class TestHdfsParquetTableStatsWriter(ImpalaTestSuite): """Test that writing a Parquet table with too many columns results in an error.""" num_cols = 12000 query = "create table %s.wide stored as parquet as select \n" % unique_database - query += ", ".join(map(str, xrange(num_cols))) + query += ", ".join(map(str, range(num_cols))) query += ";\n" result = self.execute_query_expect_failure(self.client, query) assert "Minimum required block size must be less than 2GB" in str(result) diff --git a/tests/query_test/test_insert_permutation.py b/tests/query_test/test_insert_permutation.py index a3973a40e..14cf638a0 100644 --- a/tests/query_test/test_insert_permutation.py +++ b/tests/query_test/test_insert_permutation.py @@ -18,6 +18,7 @@ # Targeted Impala insert tests from __future__ import absolute_import, division, print_function +from builtins import map from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_dimensions import ( create_exec_option_dimension, @@ -47,8 +48,8 @@ class TestInsertQueriesWithPermutation(ImpalaTestSuite): create_uncompressed_text_dimension(cls.get_workload())) def test_insert_permutation(self, vector): - map(self.cleanup_db, ["insert_permutation_test"]) + list(map(self.cleanup_db, ["insert_permutation_test"])) self.run_test_case('QueryTest/insert_permutation', vector) def teardown_method(self, method): - map(self.cleanup_db, ["insert_permutation_test"]) + list(map(self.cleanup_db, ["insert_permutation_test"])) diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py index d637b3c35..f985599e2 100644 --- a/tests/query_test/test_kudu.py +++ b/tests/query_test/test_kudu.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from kudu.schema import ( BOOL, DOUBLE, diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py index 8660f9433..2b5982fa1 100644 --- a/tests/query_test/test_mem_usage_scaling.py +++ b/tests/query_test/test_mem_usage_scaling.py @@ -17,6 +17,7 @@ # from __future__ import absolute_import, division, print_function +from builtins import range import pytest from copy import copy diff --git a/tests/query_test/test_parquet_bloom_filter.py b/tests/query_test/test_parquet_bloom_filter.py index 902d7e769..601533373 100644 --- a/tests/query_test/test_parquet_bloom_filter.py +++ b/tests/query_test/test_parquet_bloom_filter.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import math import os diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index be372018b..5f3ae18f9 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -22,6 +22,7 @@ # explode. from __future__ import absolute_import, division, print_function +from builtins import range import os import pytest import random diff --git a/tests/query_test/test_scanners_fuzz.py b/tests/query_test/test_scanners_fuzz.py index ec411abc1..ed7ce144c 100644 --- a/tests/query_test/test_scanners_fuzz.py +++ b/tests/query_test/test_scanners_fuzz.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from copy import copy import itertools import logging @@ -322,7 +323,7 @@ class TestScannersFuzzing(ImpalaTestSuite): data = bytearray(f.read()) num_corruptions = rng.randint(0, int(math.log(len(data)))) - for _ in xrange(num_corruptions): + for _ in range(num_corruptions): flip_offset = rng.randint(0, len(data) - 1) flip_val = rng.randint(0, 255) LOG.info("corrupt file: Flip byte in {0} at {1} from {2} to {3}".format( diff --git a/tests/query_test/test_sort.py b/tests/query_test/test_sort.py index bcb76a766..dca3ec35a 100644 --- a/tests/query_test/test_sort.py +++ b/tests/query_test/test_sort.py @@ -22,12 +22,24 @@ from copy import copy, deepcopy from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfNotHdfsMinicluster + def transpose_results(result, map_fn=lambda x: x): """Given a query result (list of strings, each string represents a row), return a list - of columns, where each column is a list of strings. Optionally, map_fn can be provided - to be applied to every value, eg. to convert the strings to their underlying types.""" + of columns, where each column is a list of strings. Optionally, map_fn can be + provided to be applied to every value, eg. to convert the strings to their + underlying types.""" + + # Split result rows by tab to produce a list of lists. i.e. + # [[a1,a2], [b1, b2], [c1, c2]] split_result = [row.split('\t') for row in result] - return [map(map_fn, list(l)) for l in zip(*split_result)] + column_result = [] + for col in zip(*split_result): + # col is the transposed result, i.e. a1, b1, c1 + # Apply map_fn to all elements + column_result.append([map_fn(x) for x in col]) + + return column_result + class TestQueryFullSort(ImpalaTestSuite): """Test class to do functional validation of sorting when data is spilled to disk.""" diff --git a/tests/query_test/test_tpch_queries.py b/tests/query_test/test_tpch_queries.py index 593037d5b..201500a2d 100644 --- a/tests/query_test/test_tpch_queries.py +++ b/tests/query_test/test_tpch_queries.py @@ -17,6 +17,7 @@ # Functional tests running the TPCH workload. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from tests.common.impala_test_suite import ImpalaTestSuite @@ -42,7 +43,7 @@ class TestTpchQuery(ImpalaTestSuite): def idfn(val): return "TPC-H: Q{0}".format(val) - @pytest.mark.parametrize("query", xrange(1, 23), ids=idfn) + @pytest.mark.parametrize("query", range(1, 23), ids=idfn) def test_tpch(self, vector, query): self.run_test_case('tpch-q{0}'.format(query), vector) diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py index f78fa332c..f02adba18 100644 --- a/tests/shell/test_shell_commandline.py +++ b/tests/shell/test_shell_commandline.py @@ -19,6 +19,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import errno import getpass import os @@ -1006,7 +1007,7 @@ class TestImpalaShell(ImpalaTestSuite): # This generates a sql file size of ~50K. num_cols = 1000 os.write(sql_file, "select \n") - for i in xrange(num_cols): + for i in range(num_cols): if i < num_cols: os.write(sql_file, "col_{0} as a{1},\n".format(i, i)) os.write(sql_file, "col_{0} as b{1},\n".format(i, i)) diff --git a/tests/statestore/test_statestore.py b/tests/statestore/test_statestore.py index 55cbf9b93..7ca3d713c 100644 --- a/tests/statestore/test_statestore.py +++ b/tests/statestore/test_statestore.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range from collections import defaultdict import json import logging @@ -115,7 +116,7 @@ class KillableThreadedServer(TServer): self.processor = None def wait_until_up(self, num_tries=10): - for i in xrange(num_tries): + for i in range(num_tries): cnxn = TSocket.TSocket('localhost', self.port) try: cnxn.open() @@ -125,7 +126,7 @@ class KillableThreadedServer(TServer): time.sleep(0.1) def wait_until_down(self, num_tries=10): - for i in xrange(num_tries): + for i in range(num_tries): cnxn = TSocket.TSocket('localhost', self.port) try: cnxn.open() @@ -349,7 +350,7 @@ class TestStatestore(): num_updates=1, clear_topic_entries=False): topic_entries = [ Subscriber.TTopicItem(key=key_template + str(x), value=value_template + str(x)) - for x in xrange(num_updates)] + for x in range(num_updates)] return Subscriber.TTopicDelta(topic_name=topic_name, topic_entries=topic_entries, is_delta=False, diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py index 7fa6f6d28..8c15e49ee 100755 --- a/tests/stress/concurrent_select.py +++ b/tests/stress/concurrent_select.py @@ -57,6 +57,7 @@ from __future__ import absolute_import, division, print_function +from builtins import range import logging import os import re @@ -187,7 +188,7 @@ def print_crash_info_if_exists(impala, start_time): that evaluates to True if any impalads are stopped. """ max_attempts = 5 - for remaining_attempts in xrange(max_attempts - 1, -1, -1): + for remaining_attempts in range(max_attempts - 1, -1, -1): try: crashed_impalads = impala.find_crashed_impalads(start_time) break @@ -388,7 +389,7 @@ class StressRunner(object): queries_by_type[query.query_type] = [] queries_by_type[query.query_type].append(query) try: - for _ in xrange(self._num_queries_to_run): + for _ in range(self._num_queries_to_run): # First randomly determine a query type, then choose a random query of that # type. if ( @@ -918,7 +919,7 @@ def populate_runtime_info(query, impala, converted_args, timeout_secs=maxint): def get_report(desired_outcome=None): reports_by_outcome = defaultdict(list) leading_outcome = None - for remaining_samples in xrange(samples - 1, -1, -1): + for remaining_samples in range(samples - 1, -1, -1): report = runner.run_query(query, mem_limit, run_set_up=True, timeout_secs=timeout_secs, retain_profile=True) if report.timed_out: @@ -1438,7 +1439,7 @@ def main(): impala, file_queries, converted_args)) # Apply tweaks to the query's runtime info as requested by CLI options. - for idx in xrange(len(queries) - 1, -1, -1): + for idx in range(len(queries) - 1, -1, -1): query = queries[idx] if query.required_mem_mb_with_spilling: query.required_mem_mb_with_spilling += int( diff --git a/tests/stress/queries.py b/tests/stress/queries.py index c4c01a783..36b3a287c 100644 --- a/tests/stress/queries.py +++ b/tests/stress/queries.py @@ -21,6 +21,7 @@ # stress test, loading them and generating them. from __future__ import absolute_import, division, print_function +from builtins import range import logging import os from textwrap import dedent @@ -36,7 +37,7 @@ LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0]) class QueryType(object): - COMPUTE_STATS, DELETE, INSERT, SELECT, UPDATE, UPSERT = range(6) + COMPUTE_STATS, DELETE, INSERT, SELECT, UPDATE, UPSERT = list(range(6)) class Query(object): diff --git a/tests/stress/query_retries_stress_runner.py b/tests/stress/query_retries_stress_runner.py index d21496a83..87560d3e2 100755 --- a/tests/stress/query_retries_stress_runner.py +++ b/tests/stress/query_retries_stress_runner.py @@ -25,6 +25,7 @@ # the script, it has to be killed manually (e.g. kill [pid]). from __future__ import absolute_import, division, print_function +from builtins import map, range import logging import pipes import os @@ -176,12 +177,12 @@ def run_concurrent_workloads(concurrency, coordinator, database, queries): # complete. workload_threads = [] LOG.info("Starting {0} concurrent workloads".format(concurrency)) - for i in xrange(concurrency): + for i in range(concurrency): workload_thread = threading.Thread(target=__run_workload, args=[i], name="workload_thread_{0}".format(i)) workload_thread.start() workload_threads.append(workload_thread) - map(lambda thread: thread.join(), workload_threads) + list(map(lambda thread: thread.join(), workload_threads)) # Check if any of the workload runner threads hit an exception, if one did then print # the error and exit. @@ -254,7 +255,7 @@ def run_stress_workload(queries, database, workload, start_delay, start_random_impalad_killer(kill_frequency, start_delay, cluster) # Run the stress test 'iterations' times. - for i in xrange(iterations): + for i in range(iterations): LOG.info("Starting iteration {0} of workload {1}".format(i, workload)) run_concurrent_workloads(concurrency, impala_coordinator, database, queries) diff --git a/tests/stress/test_acid_stress.py b/tests/stress/test_acid_stress.py index 49b94b2db..a284717b5 100644 --- a/tests/stress/test_acid_stress.py +++ b/tests/stress/test_acid_stress.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import map, range import pytest import random import time @@ -62,7 +63,7 @@ class TestAcidInsertsBasic(TestAcidStress): run_max = -1 i_list = [] for line in result.data: - [run, i] = map(int, (line.split('\t'))) + [run, i] = list(map(int, (line.split('\t')))) run_max = max(run_max, run) i_list.append(i) assert expected_result["run"] <= run_max # shouldn't see data overwritten in the past @@ -72,17 +73,18 @@ class TestAcidInsertsBasic(TestAcidStress): expected_result["i"] = 0 return assert i_list[-1] >= expected_result["i"] - assert i_list == range(i_list[-1] + 1) # 'i' should have all values from 0 to max_i + # 'i' should have all values from 0 to max_i + assert i_list == list(range(i_list[-1] + 1)) expected_result["i"] = i_list[-1] def _hive_role_write_inserts(self, tbl_name, partitioned): """INSERT INTO/OVERWRITE a table several times from Hive.""" part_expr = "partition (p=1)" if partitioned else "" - for run in xrange(0, NUM_OVERWRITES): + for run in range(0, NUM_OVERWRITES): OVERWRITE_SQL = """insert overwrite table %s %s values (%i, %i) """ % (tbl_name, part_expr, run, 0) self.run_stmt_in_hive(OVERWRITE_SQL) - for i in xrange(1, NUM_INSERTS_PER_OVERWRITE + 1): + for i in range(1, NUM_INSERTS_PER_OVERWRITE + 1): INSERT_SQL = """insert into table %s %s values (%i, %i) """ % (tbl_name, part_expr, run, i) self.run_stmt_in_hive(INSERT_SQL) @@ -92,11 +94,11 @@ class TestAcidInsertsBasic(TestAcidStress): try: impalad_client = ImpalaTestSuite.create_impala_client() part_expr = "partition (p=1)" if partitioned else "" - for run in xrange(0, NUM_OVERWRITES + 1): + for run in range(0, NUM_OVERWRITES + 1): OVERWRITE_SQL = """insert overwrite table %s %s values (%i, %i) """ % (tbl_name, part_expr, run, 0) impalad_client.execute(OVERWRITE_SQL) - for i in xrange(1, NUM_INSERTS_PER_OVERWRITE + 1): + for i in range(1, NUM_INSERTS_PER_OVERWRITE + 1): INSERT_SQL = """insert into table %s %s values (%i, %i) """ % (tbl_name, part_expr, run, i) impalad_client.execute(INSERT_SQL) @@ -261,11 +263,12 @@ class TestConcurrentAcidInserts(TestAcidStress): def verify_result_set(result): wid_to_run = dict() for line in result.data: - [wid, i] = map(int, (line.split('\t'))) + [wid, i] = list(map(int, (line.split('\t')))) wid_to_run.setdefault(wid, []).append(i) for wid, run in wid_to_run.items(): sorted_run = sorted(run) - assert sorted_run == range(sorted_run[0], sorted_run[-1] + 1), "wid: %d" % wid + assert sorted_run == list(range(sorted_run[0], sorted_run[-1] + 1)), \ + "wid: %d" % wid target_impalad = cid % ImpalaTestSuite.get_impalad_cluster_size() impalad_client = ImpalaTestSuite.create_client_for_nth_impalad(target_impalad) @@ -299,10 +302,10 @@ class TestConcurrentAcidInserts(TestAcidStress): num_checkers = 3 writers = [Task(self._impala_role_concurrent_writer, tbl_name, i, counter) - for i in xrange(0, num_writers)] + for i in range(0, num_writers)] checkers = [Task(self._impala_role_concurrent_checker, tbl_name, i, counter, num_writers) - for i in xrange(0, num_checkers)] + for i in range(0, num_checkers)] run_tasks(writers + checkers) @@ -369,9 +372,9 @@ class TestFailingAcidInserts(TestAcidStress): num_checkers = 3 writers = [Task(self._impala_role_insert, tbl_name, partitioned, i, counter) - for i in xrange(0, num_writers)] + for i in range(0, num_writers)] checkers = [Task(self._impala_role_checker, tbl_name, i, counter, num_writers) - for i in xrange(0, num_checkers)] + for i in range(0, num_checkers)] run_tasks(writers + checkers) @SkipIfFS.stress_insert_timeouts diff --git a/tests/stress/test_ddl_stress.py b/tests/stress/test_ddl_stress.py index 09480b315..9eb9e0d42 100644 --- a/tests/stress/test_ddl_stress.py +++ b/tests/stress/test_ddl_stress.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import pytest from tests.common.impala_test_suite import ImpalaTestSuite @@ -25,7 +26,7 @@ from tests.common.skip import SkipIfFS NUM_TBLS_PER_THREAD = 10 # Each client will get a different test id. -TEST_INDICES = xrange(10) +TEST_INDICES = range(10) # Simple stress test for DDL operations. Attempts to create, cache, @@ -57,7 +58,7 @@ class TestDdlStress(ImpalaTestSuite): # rather simultaneously on the same object. self.client.execute("create database if not exists {0}".format(self.SHARED_DATABASE)) - for i in xrange(NUM_TBLS_PER_THREAD): + for i in range(NUM_TBLS_PER_THREAD): tbl_name = "{db}.test_{checksum}_{i}".format( db=self.SHARED_DATABASE, checksum=testid_checksum, diff --git a/tests/stress/test_insert_stress.py b/tests/stress/test_insert_stress.py index 93eaf91d6..81611e5a5 100644 --- a/tests/stress/test_insert_stress.py +++ b/tests/stress/test_insert_stress.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import map, range import pytest import random import time @@ -64,11 +65,12 @@ class TestInsertStress(ImpalaTestSuite): def verify_result_set(result): wid_to_run = dict() for line in result.data: - [wid, i] = map(int, (line.split('\t'))) + [wid, i] = list(map(int, (line.split('\t')))) wid_to_run.setdefault(wid, []).append(i) for wid, run in wid_to_run.items(): sorted_run = sorted(run) - assert sorted_run == range(sorted_run[0], sorted_run[-1] + 1), "wid: %d" % wid + assert sorted_run == list(range(sorted_run[0], sorted_run[-1] + 1)), \ + "wid: %d" % wid target_impalad = cid % ImpalaTestSuite.get_impalad_cluster_size() impalad_client = ImpalaTestSuite.create_client_for_nth_impalad(target_impalad) @@ -97,10 +99,10 @@ class TestInsertStress(ImpalaTestSuite): inserts = 50 writers = [Task(self._impala_role_concurrent_writer, tbl_name, i, inserts, counter) - for i in xrange(0, num_writers)] + for i in range(0, num_writers)] checkers = [Task(self._impala_role_concurrent_checker, tbl_name, i, counter, num_writers) - for i in xrange(0, num_checkers)] + for i in range(0, num_checkers)] run_tasks(writers + checkers) @pytest.mark.execute_serially @@ -124,8 +126,8 @@ class TestInsertStress(ImpalaTestSuite): inserts = 30 writers = [Task(self._impala_role_concurrent_writer, tbl_name, i, inserts, counter) - for i in xrange(0, num_writers)] + for i in range(0, num_writers)] checkers = [Task(self._impala_role_concurrent_checker, tbl_name, i, counter, num_writers) - for i in xrange(0, num_checkers)] + for i in range(0, num_checkers)] run_tasks(writers + checkers) diff --git a/tests/util/calculation_util.py b/tests/util/calculation_util.py index 657c40a92..2899eecee 100644 --- a/tests/util/calculation_util.py +++ b/tests/util/calculation_util.py @@ -21,6 +21,7 @@ # dependencies. from __future__ import absolute_import, division, print_function +from builtins import range import math import random import string diff --git a/tests/util/concurrent_workload.py b/tests/util/concurrent_workload.py index 025f67da7..2ceb81574 100755 --- a/tests/util/concurrent_workload.py +++ b/tests/util/concurrent_workload.py @@ -20,6 +20,7 @@ # This class can be used to drive a concurrent workload against a local minicluster from __future__ import absolute_import, division, print_function +from builtins import range import argparse import logging # Needed to work around datetime threading bug: @@ -103,7 +104,7 @@ class ConcurrentWorkload(object): def start(self): """Starts worker threads to execute queries.""" # Start workers - for i in xrange(self.num_streams): + for i in range(self.num_streams): t = Thread(target=self.loop_query, args=(self.query, self.output_q, self.stop_ev)) self.threads.append(t) t.start() diff --git a/tests/util/get_parquet_metadata.py b/tests/util/get_parquet_metadata.py index 21099acb9..126f5434e 100644 --- a/tests/util/get_parquet_metadata.py +++ b/tests/util/get_parquet_metadata.py @@ -16,11 +16,13 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import map import os import struct from datetime import date, datetime, time, timedelta from decimal import Decimal +from functools import reduce from parquet.ttypes import ColumnIndex, FileMetaData, OffsetIndex, PageHeader, Type from subprocess import check_call from thrift.protocol import TCompactProtocol @@ -97,7 +99,7 @@ def decode_decimal(schema, value): assert schema.type_length == len(value) assert schema.type == Type.FIXED_LEN_BYTE_ARRAY - numeric = Decimal(reduce(lambda x, y: x * 256 + y, map(ord, value))) + numeric = Decimal(reduce(lambda x, y: x * 256 + y, list(map(ord, value)))) # Compute two's complement for negative values. if (ord(value[0]) > 127): diff --git a/tests/util/ssh_util.py b/tests/util/ssh_util.py index 67b33889e..61b249bf8 100644 --- a/tests/util/ssh_util.py +++ b/tests/util/ssh_util.py @@ -16,6 +16,7 @@ # under the License. from __future__ import absolute_import, division, print_function +from builtins import range import atexit import logging import os diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py index 91973c457..f1746e45c 100644 --- a/tests/util/test_file_parser.py +++ b/tests/util/test_file_parser.py @@ -18,6 +18,7 @@ # This module is used for common utilities related to parsing test files from __future__ import absolute_import, division, print_function +from builtins import map import codecs import collections import logging @@ -125,10 +126,10 @@ def parse_table_constraints(constraints_file): schema_only[f].append(table_name.lower()) elif constraint_type == 'restrict_to': schema_include[table_name.lower()] +=\ - map(parse_table_format_constraint, table_formats.split(',')) + list(map(parse_table_format_constraint, table_formats.split(','))) elif constraint_type == 'exclude': schema_exclude[table_name.lower()] +=\ - map(parse_table_format_constraint, table_formats.split(',')) + list(map(parse_table_format_constraint, table_formats.split(','))) else: raise ValueError('Unknown constraint type: %s' % constraint_type) return schema_include, schema_exclude, schema_only @@ -377,7 +378,10 @@ def load_tpc_queries(workload, include_stress_queries=False, query_name_filters= file_name_pattern = re.compile(r"(.*)") query_name_pattern = re.compile(r"(.*)") - query_name_filters = map(str.strip, query_name_filters) if query_name_filters else [] + if query_name_filters: + query_name_filters = list(map(str.strip, query_name_filters)) + else: + query_name_filters = [] filter_regex = re.compile(r'|'.join(['^%s$' % n for n in query_name_filters]), re.I) for query_file in os.listdir(query_dir):
