This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new b46d54150 IMPALA-13961: Remove usage of ImpalaBeeswaxResult.schema
b46d54150 is described below
commit b46d5415018feb8e2d64a5b0e111d8aa96999d03
Author: Riza Suminto <[email protected]>
AuthorDate: Sun Apr 13 22:08:03 2025 -0700
IMPALA-13961: Remove usage of ImpalaBeeswaxResult.schema
An equivalent of ImpalaBeeswaxResult.schema is not implemented at
ImpylaHS2ResultSet. However, column_labels and column_types fields are
implemented for both.
This patch removes usage of ImpalaBeeswaxResult.schema and replaces it
with either column_labels or column_types field. Tests that used to
access ImpalaBeeswaxResult.schema are migrated to test using hs2
protocol by default. Also fix flake8 issues in modified test files.
Testing:
Run and pass modified test files in exhaustive exploration.
Change-Id: I060fe2d3cded1470fd09b86675cb22442c19fbee
Reviewed-on: http://gerrit.cloudera.org:8080/22776
Reviewed-by: Riza Suminto <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
tests/common/impala_connection.py | 5 ++++
tests/common/impala_test_suite.py | 6 ++---
.../test_metadata_no_events_processing.py | 15 ++++++++----
tests/custom_cluster/test_stats_extrapolation.py | 11 ++++++---
tests/metadata/test_event_processing.py | 5 ++++
tests/metadata/test_hms_integration.py | 5 ++++
tests/metadata/test_refresh_partition.py | 7 +++++-
tests/metadata/test_stats_extrapolation.py | 27 +++++++++++++---------
8 files changed, 58 insertions(+), 23 deletions(-)
diff --git a/tests/common/impala_connection.py
b/tests/common/impala_connection.py
index 9ac6bafb0..2a6b2e7bc 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -956,6 +956,11 @@ class ImpylaHS2ResultSet(object):
"""Return the raw HS2 result set, which is a list of tuples."""
return self.__result_tuples
+ def get_data(self):
+ if self.data:
+ return '\n'.join(self.data)
+ return ''
+
def __convert_result_row(self, result_tuple):
"""Take primitive values from a result tuple and construct the
tab-separated string
that would have been returned via beeswax."""
diff --git a/tests/common/impala_test_suite.py
b/tests/common/impala_test_suite.py
index 72126a39a..db62e2242 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -569,10 +569,10 @@ class ImpalaTestSuite(BaseTestSuite):
'include_fields'. Field names are compared case-insensitively.
"""
exec_result = self.client.execute('show partitions %s' % table_name)
- fieldSchemas = exec_result.schema.fieldSchemas
+ column_labels = exec_result.column_labels
fields_dict = {}
- for idx, fs in enumerate(fieldSchemas):
- fields_dict[fs.name.lower()] = idx
+ for idx, name in enumerate(column_labels):
+ fields_dict[name.lower()] = idx
rows = exec_result.get_data().split('\n')
rows.pop()
diff --git a/tests/custom_cluster/test_metadata_no_events_processing.py
b/tests/custom_cluster/test_metadata_no_events_processing.py
index d4305e624..621bf06c5 100644
--- a/tests/custom_cluster/test_metadata_no_events_processing.py
+++ b/tests/custom_cluster/test_metadata_no_events_processing.py
@@ -18,11 +18,16 @@
from __future__ import absolute_import, division, print_function
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.skip import SkipIfFS
+from tests.common.test_vector import HS2
@SkipIfFS.hive
class TestMetadataNoEventsProcessing(CustomClusterTestSuite):
+ @classmethod
+ def default_test_protocol(cls):
+ return HS2
+
@CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0")
def test_refresh_updated_partitions(self, unique_database):
"""
@@ -121,7 +126,7 @@ class
TestMetadataNoEventsProcessing(CustomClusterTestSuite):
assert len(result) == 6
@CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0")
- def test_add_overlapping_partitions(self, vector, unique_database):
+ def test_add_overlapping_partitions(self, unique_database):
"""
IMPALA-1670, IMPALA-4141: Test interoperability with Hive when adding
overlapping
partitions to a table
@@ -183,7 +188,7 @@ class
TestMetadataNoEventsProcessing(CustomClusterTestSuite):
assert x1_location.endswith("/x=1")
@CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0")
- def test_add_preexisting_partitions_with_data(self, unique_database, vector):
+ def test_add_preexisting_partitions_with_data(self, unique_database):
"""
IMPALA-1670, IMPALA-4141: After addding partitions that already exist in
HMS, Impala
can access the partition data.
@@ -223,7 +228,7 @@ class
TestMetadataNoEventsProcessing(CustomClusterTestSuite):
'select x, a from %s order by x, a' %
table_name).get_data().split('\n')
@CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0")
- def test_refresh_invalid_partition(self, vector, unique_database):
+ def test_refresh_invalid_partition(self, unique_database):
"""
Trying to refresh a partition that does not exist does not modify anything
either in impala or hive.
@@ -241,7 +246,7 @@ class
TestMetadataNoEventsProcessing(CustomClusterTestSuite):
assert ['y=333/z=5309'] == self.hive_partition_names(table_name)
@CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0")
- def test_add_data_and_refresh(self, vector, unique_database):
+ def test_add_data_and_refresh(self, unique_database):
"""
Data added through hive is visible in impala after refresh of partition.
"""
@@ -266,7 +271,7 @@ class
TestMetadataNoEventsProcessing(CustomClusterTestSuite):
'select * from %s' % table_name).get_data()
@CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0")
- def test_refresh_partition_num_rows(self, vector, unique_database):
+ def test_refresh_partition_num_rows(self, unique_database):
"""Refreshing a partition should not change it's numRows stat."""
# Create a partitioned table and add data to it.
tbl = unique_database + ".t1"
diff --git a/tests/custom_cluster/test_stats_extrapolation.py
b/tests/custom_cluster/test_stats_extrapolation.py
index cd0af1f38..9a6cdba84 100644
--- a/tests/custom_cluster/test_stats_extrapolation.py
+++ b/tests/custom_cluster/test_stats_extrapolation.py
@@ -19,15 +19,20 @@ from __future__ import absolute_import, division,
print_function
import pytest
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.test_dimensions import (
- create_exec_option_dimension,
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
+from tests.common.test_vector import HS2
+
class TestStatsExtrapolation(CustomClusterTestSuite):
"""Minimal end-to-end test for the --enable_stats_extrapolation impalad
flag. This test
primarly checks that the flag is propagated to the FE. More testing is done
in FE unit
tests and metadata/test_stats_extrapolation.py."""
+ @classmethod
+ def default_test_protocol(cls):
+ return HS2
+
@classmethod
def add_test_dimensions(cls):
super(TestStatsExtrapolation, cls).add_test_dimensions()
@@ -52,13 +57,13 @@ class TestStatsExtrapolation(CustomClusterTestSuite):
"compute stats {0} tablesample system (13)".format(part_test_tbl))
# Check that table stats were set.
table_stats = self.client.execute("show table stats
{0}".format(part_test_tbl))
- col_names = [fs.name.upper() for fs in table_stats.schema.fieldSchemas]
+ col_names = table_stats.column_labels
extrap_rows_idx = col_names.index("EXTRAP #ROWS")
for row in table_stats.data:
assert int(row.split("\t")[extrap_rows_idx]) >= 0
# Check that column stats were set.
col_stats = self.client.execute("show column stats
{0}".format(part_test_tbl))
- col_names = [fs.name.upper() for fs in col_stats.schema.fieldSchemas]
+ col_names = col_stats.column_labels
ndv_col_idx = col_names.index("#DISTINCT VALUES")
for row in col_stats.data:
assert int(row.split("\t")[ndv_col_idx]) >= 0
diff --git a/tests/metadata/test_event_processing.py
b/tests/metadata/test_event_processing.py
index 1d1d2cc19..9315d99dc 100644
--- a/tests/metadata/test_event_processing.py
+++ b/tests/metadata/test_event_processing.py
@@ -25,6 +25,7 @@ from tests.common.test_dimensions import (
add_mandatory_exec_option)
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfFS, SkipIfHive2, SkipIfCatalogV2
+from tests.common.test_vector import HS2
from tests.metadata.test_event_processing_base import TestEventProcessingBase
from tests.util.event_processor_utils import EventProcessorUtils
@@ -37,6 +38,10 @@ class TestEventProcessing(ImpalaTestSuite):
"""This class contains tests that exercise the event processing mechanism in
the
catalog."""
+ @classmethod
+ def default_test_protocol(cls):
+ return HS2
+
@SkipIfHive2.acid
def test_transactional_insert_events(self, unique_database):
"""Executes 'run_test_insert_events' for transactional tables.
diff --git a/tests/metadata/test_hms_integration.py
b/tests/metadata/test_hms_integration.py
index 27b0e538c..630d2b3cd 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -36,6 +36,7 @@ from tests.common.skip import SkipIfFS, SkipIfHive2,
SkipIfHive3
from tests.common.test_dimensions import (
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
+from tests.common.test_vector import HS2
from tests.util.event_processor_utils import EventProcessorUtils
from tests.util.hive_utils import HiveDbWrapper, HiveTableWrapper
@@ -133,6 +134,10 @@ class TestHmsIntegrationSanity(ImpalaTestSuite):
@SkipIfFS.hive
class TestHmsIntegration(ImpalaTestSuite):
+ @classmethod
+ def default_test_protocol(cls):
+ return HS2
+
@classmethod
def add_test_dimensions(cls):
super(TestHmsIntegration, cls).add_test_dimensions()
diff --git a/tests/metadata/test_refresh_partition.py
b/tests/metadata/test_refresh_partition.py
index 64ab9b144..18d1fbd69 100644
--- a/tests/metadata/test_refresh_partition.py
+++ b/tests/metadata/test_refresh_partition.py
@@ -19,6 +19,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import create_single_exec_option_dimension
from tests.common.test_dimensions import create_uncompressed_text_dimension
from tests.common.skip import SkipIfFS
+from tests.common.test_vector import HS2
from tests.util.filesystem_utils import get_fs_path
@@ -29,6 +30,10 @@ class TestRefreshPartition(ImpalaTestSuite):
for a table in HDFS
"""
+ @classmethod
+ def default_test_protocol(cls):
+ return HS2
+
@classmethod
def add_test_dimensions(cls):
super(TestRefreshPartition, cls).add_test_dimensions()
@@ -162,4 +167,4 @@ class TestRefreshPartition(ImpalaTestSuite):
# Check that data is visible for the second partition after refresh
self.client.execute("refresh %s partition (year=2010, month=2)" %
table_name)
result = self.client.execute("select count(*) from %s" % table_name)
- assert result.data == [str(file_num_rows*2)]
+ assert result.data == [str(file_num_rows * 2)]
diff --git a/tests/metadata/test_stats_extrapolation.py
b/tests/metadata/test_stats_extrapolation.py
index 10a12daa9..fccba527f 100644
--- a/tests/metadata/test_stats_extrapolation.py
+++ b/tests/metadata/test_stats_extrapolation.py
@@ -17,12 +17,12 @@
from __future__ import absolute_import, division, print_function
from builtins import range
-from os import path
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfEC
from tests.common.test_dimensions import (
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
+from tests.common.test_vector import HS2
class TestStatsExtrapolation(ImpalaTestSuite):
@@ -30,6 +30,10 @@ class TestStatsExtrapolation(ImpalaTestSuite):
enabled via table property and not via the impalad startup flag so these
tests can be
run as regular tests (non-custom-cluster) and in parallel with other
tests."""
+ @classmethod
+ def default_test_protocol(cls):
+ return HS2
+
@classmethod
def add_test_dimensions(cls):
super(TestStatsExtrapolation, cls).add_test_dimensions()
@@ -70,8 +74,8 @@ class TestStatsExtrapolation(ImpalaTestSuite):
# Test unpartitioned table.
nopart_test_tbl = unique_database + ".alltypesnopart"
- self.client.execute("create table {0} as select * from
functional.alltypes"\
- .format(nopart_test_tbl))
+ self.client.execute(
+ "create table {0} as select * from
functional.alltypes".format(nopart_test_tbl))
# Clone to use as a baseline. We run the regular COMPUTE STATS on this
table.
nopart_test_tbl_base = unique_database + ".alltypesnopart_base"
self.clone_table(nopart_test_tbl, nopart_test_tbl_base, False, vector)
@@ -118,7 +122,7 @@ class TestStatsExtrapolation(ImpalaTestSuite):
def __set_extrapolation_tblprop(self, tbl):
"""Alters the given table to enable stats extrapolation via tblproperty."""
- self.client.execute("alter table {0} set "\
+ self.client.execute("alter table {0} set "
"tblproperties('impala.enable.stats.extrapolation'='true')".format(tbl))
def __run_sampling_test(self, tbl, cols, expected_tbl, perc, seed):
@@ -127,8 +131,9 @@ class TestStatsExtrapolation(ImpalaTestSuite):
the resulting table and column stats are reasonably close to those of
'expected_tbl'."""
self.client.execute("drop stats {0}".format(tbl))
- self.client.execute("compute stats {0}{1} tablesample system ({2})
repeatable ({3})"\
- .format(tbl, cols, perc, seed))
+ self.client.execute(
+ "compute stats {0}{1} tablesample system ({2}) repeatable
({3})".format(
+ tbl, cols, perc, seed))
self.__check_table_stats(tbl, expected_tbl)
self.__check_column_stats(cols, tbl, expected_tbl)
@@ -139,8 +144,8 @@ class TestStatsExtrapolation(ImpalaTestSuite):
actual = self.client.execute("show table stats {0}".format(tbl))
expected = self.client.execute("show table stats {0}".format(expected_tbl))
assert len(actual.data) == len(expected.data)
- assert len(actual.schema.fieldSchemas) == len(expected.schema.fieldSchemas)
- col_names = [fs.name.upper() for fs in actual.schema.fieldSchemas]
+ assert len(actual.column_labels) == len(expected.column_labels)
+ col_names = actual.column_labels
rows_col_idx = col_names.index("#ROWS")
extrap_rows_col_idx = col_names.index("EXTRAP #ROWS")
for i in range(0, len(actual.data)):
@@ -149,7 +154,7 @@ class TestStatsExtrapolation(ImpalaTestSuite):
assert int(exp_cols[rows_col_idx]) >= 0
# The expected_tbl is expected to have valid extrapolated #rows for
every partition.
assert int(act_cols[extrap_rows_col_idx]) >= 0
- self.appx_equals(\
+ self.appx_equals(
int(act_cols[extrap_rows_col_idx]), int(exp_cols[rows_col_idx]), 1.0)
# Only the table-level row count is stored. The partition row counts
# are extrapolated.
@@ -167,8 +172,8 @@ class TestStatsExtrapolation(ImpalaTestSuite):
actual = self.client.execute("show column stats {0}".format(tbl))
expected = self.client.execute("show column stats
{0}".format(expected_tbl))
assert len(actual.data) == len(expected.data)
- assert len(actual.schema.fieldSchemas) == len(expected.schema.fieldSchemas)
- col_names = [fs.name.upper() for fs in actual.schema.fieldSchemas]
+ assert len(actual.column_labels) == len(expected.column_labels)
+ col_names = actual.column_labels
ndv_col_idx = col_names.index("#DISTINCT VALUES")
for i in range(0, len(actual.data)):
act_cols = actual.data[i].split("\t")