Repository: incubator-impala
Updated Branches:
  refs/heads/master ca62ce65e -> 19ff47091


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/19ff4709/tests/metadata/test_ddl.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 9cb5ce5..f1d85c6 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -12,47 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# Impala tests for DDL statements
-import logging
 import pytest
-import shlex
 import time
 import getpass
-from tests.common.test_result_verifier import *
-from subprocess import call
 from tests.common.test_vector import *
-from tests.common.test_dimensions import ALL_NODES_ONLY
 from tests.common.impala_test_suite import *
 from tests.common.skip import SkipIf, SkipIfS3, SkipIfIsilon, SkipIfLocal, 
SkipIfOldAggsJoins
 from tests.util.filesystem_utils import WAREHOUSE, IS_LOCAL
+from test_ddl_base import TestDdlBase
 
 # Validates DDL statements (create, drop)
-class TestDdlStatements(ImpalaTestSuite):
+class TestDdlStatements(TestDdlBase):
   TEST_DBS = ['ddl_test_db', 'ddl_purge_db', 'alter_table_test_db',
               'alter_table_test_db2', 'function_ddl_test', 'udf_test', 
'data_src_test',
               'truncate_table_test_db', 'test_db', 'alter_purge_db', 
'db_with_comment']
 
-  @classmethod
-  def get_workload(self):
-    return 'functional-query'
-
-  @classmethod
-  def add_test_dimensions(cls):
-    super(TestDdlStatements, cls).add_test_dimensions()
-    sync_ddl_opts = [0, 1]
-    if cls.exploration_strategy() != 'exhaustive':
-      # Only run with sync_ddl on exhaustive since it increases test runtime.
-      sync_ddl_opts = [0]
-
-    cls.TestMatrix.add_dimension(create_exec_option_dimension(
-        cluster_sizes=ALL_NODES_ONLY,
-        disable_codegen_options=[False],
-        batch_sizes=[0],
-        sync_ddl=sync_ddl_opts))
-
-    # There is no reason to run these tests using all dimensions.
-    
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
-
   def setup_method(self, method):
     self._cleanup()
 
@@ -61,12 +35,6 @@ class TestDdlStatements(ImpalaTestSuite):
 
   def _cleanup(self):
     map(self.cleanup_db, self.TEST_DBS)
-    if IS_LOCAL: return
-    # Cleanup the test table HDFS dirs between test runs so there are no 
errors the next
-    # time a table is created with the same location. This also helps remove 
any stale
-    # data from the last test run.
-    for dir_ in ['part_data', 't1_tmp1', 't_part_tmp']:
-      self.filesystem_client.delete_file_dir('test-warehouse/%s' % dir_, 
recursive=True)
 
   @SkipIfLocal.hdfs_client
   @pytest.mark.execute_serially
@@ -223,66 +191,46 @@ class TestDdlStatements(ImpalaTestSuite):
         multiple_impalad=self._use_multiple_impalad(vector))
 
   @pytest.mark.execute_serially
+  def test_create_database(self, vector):
+    self.run_test_case('QueryTest/create-database', vector,
+        multiple_impalad=self._use_multiple_impalad(vector))
+
+  @pytest.mark.execute_serially
   def test_create_table(self, vector):
     vector.get_value('exec_option')['abort_on_error'] = False
     test_db_name = 'ddl_test_db'
     self._create_db(test_db_name, sync=True)
-    try:
-      self.run_test_case('QueryTest/create', vector, use_db=test_db_name,
-          multiple_impalad=self._use_multiple_impalad(vector))
-    finally:
-      self.cleanup_db(test_db_name)
+    self.run_test_case('QueryTest/create-table', vector, use_db=test_db_name,
+        multiple_impalad=self._use_multiple_impalad(vector))
 
-  @SkipIfOldAggsJoins.nested_types
   @pytest.mark.execute_serially
-  def test_create_table_nested_types(self, vector):
+  def test_create_table_like_table(self, vector):
     vector.get_value('exec_option')['abort_on_error'] = False
     test_db_name = 'ddl_test_db'
     self._create_db(test_db_name, sync=True)
-    try:
-      self.run_test_case('QueryTest/create-nested', vector, 
use_db=test_db_name,
-          multiple_impalad=self._use_multiple_impalad(vector))
-    finally:
-      self.cleanup_db(test_db_name)
+    self.run_test_case('QueryTest/create-table-like-table', vector, 
use_db=test_db_name,
+        multiple_impalad=self._use_multiple_impalad(vector))
+
+  @pytest.mark.execute_serially
+  def test_create_table_like_file(self, vector):
+    vector.get_value('exec_option')['abort_on_error'] = False
+    test_db_name = 'ddl_test_db'
+    self._create_db(test_db_name, sync=True)
+    self.run_test_case('QueryTest/create-table-like-file', vector, 
use_db=test_db_name,
+        multiple_impalad=self._use_multiple_impalad(vector))
 
-  @SkipIfS3.hive
-  @SkipIfIsilon.hive
-  @SkipIfLocal.hive
   @pytest.mark.execute_serially
-  def test_create_hive_integration(self, vector):
-    """Verifies that creating a catalog entity (database, table) in Impala 
using
-    'IF NOT EXISTS' while the entity exists in HMS, does not throw an error.
-    TODO: This test should be eventually subsumed by the Impala/Hive 
integration
-    tests."""
-    # Create a database in Hive
-    ret = call(["hive", "-e", "create database test_db"])
-    assert ret == 0
-    # Creating a database with the same name using 'IF NOT EXISTS' in Impala 
should
-    # not fail
-    self.client.execute("create database if not exists test_db")
-    # The database should appear in the catalog (IMPALA-2441)
-    assert 'test_db' in self.all_db_names()
-    # Ensure a table can be created in this database from Impala and that it is
-    # accessable in both Impala and Hive
-    self.client.execute("create table if not exists 
test_db.test_tbl_in_impala(a int)")
-    ret = call(["hive", "-e", "select * from test_db.test_tbl_in_impala"])
-    assert ret == 0
-    self.client.execute("select * from test_db.test_tbl_in_impala")
-
-    # Create a table in Hive
-    ret = call(["hive", "-e", "create table test_db.test_tbl (a int)"])
-    assert ret == 0
-    # Creating a table with the same name using 'IF NOT EXISTS' in Impala 
should
-    # not fail
-    self.client.execute("create table if not exists test_db.test_tbl (a int)")
-    # The table should not appear in the catalog unless invalidate metadata is
-    # executed
-    assert 'test_tbl' not in self.client.execute("show tables in test_db").data
-    self.client.execute("invalidate metadata test_db.test_tbl")
-    assert 'test_tbl' in self.client.execute("show tables in test_db").data
+  def test_create_table_as_select(self, vector):
+    vector.get_value('exec_option')['abort_on_error'] = False
+    test_db_name = 'ddl_test_db'
+    self._create_db(test_db_name, sync=True)
+    self.run_test_case('QueryTest/create-table-as-select', vector, 
use_db=test_db_name,
+        multiple_impalad=self._use_multiple_impalad(vector))
 
   @SkipIf.kudu_not_supported
   @pytest.mark.execute_serially
+  # TODO: Move this and other Kudu-related DDL tests into a separate py test
+  # under test_ddl_base.py.
   def test_create_kudu(self, vector):
     self.expected_exceptions = 2
     vector.get_value('exec_option')['abort_on_error'] = False
@@ -306,17 +254,29 @@ class TestDdlStatements(ImpalaTestSuite):
   @pytest.mark.execute_serially
   def test_alter_table(self, vector):
     vector.get_value('exec_option')['abort_on_error'] = False
+    self.__test_alter_table_cleanup()
     # Create directory for partition data that does not use the (key=value)
     # format.
     self.filesystem_client.make_dir("test-warehouse/part_data/", 
permission=777)
     self.filesystem_client.create_file(
         "test-warehouse/part_data/data.txt", file_data='1984')
 
-    # Create test databases
-    self._create_db('alter_table_test_db', sync=True)
-    self._create_db('alter_table_test_db2', sync=True)
-    self.run_test_case('QueryTest/alter-table', vector, 
use_db='alter_table_test_db',
-        multiple_impalad=self._use_multiple_impalad(vector))
+    try:
+      # Create test databases
+      self._create_db('alter_table_test_db', sync=True)
+      self._create_db('alter_table_test_db2', sync=True)
+      self.run_test_case('QueryTest/alter-table', vector, 
use_db='alter_table_test_db',
+          multiple_impalad=self._use_multiple_impalad(vector))
+    finally:
+      self.__test_alter_table_cleanup()
+
+  def __test_alter_table_cleanup(self):
+    if IS_LOCAL: return
+    # Cleanup the test table HDFS dirs between test runs so there are no 
errors the next
+    # time a table is created with the same location. This also helps remove 
any stale
+    # data from the last test run.
+    for dir_ in ['part_data', 't1_tmp1', 't_part_tmp']:
+      self.filesystem_client.delete_file_dir('test-warehouse/%s' % dir_, 
recursive=True)
 
   @pytest.mark.execute_serially
   @SkipIf.not_default_fs
@@ -455,28 +415,6 @@ class TestDdlStatements(ImpalaTestSuite):
     service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits + 
2)
     service.wait_for_metric_value(class_cache_misses_metric, 
class_cache_misses)
 
-  def create_drop_ddl(self, vector, db_name, create_stmts, drop_stmts, 
select_stmt,
-      num_iterations=3):
-    """ Helper method to run CREATE/DROP DDL commands repeatedly and exercise 
the lib
-    cache create_stmts is the list of CREATE statements to be executed in order
-    drop_stmts is the list of DROP statements to be executed in order. Each 
statement
-    should have a '%s' placeholder to insert "IF EXISTS" or "". The 
select_stmt is just a
-    single statement to test after executing the CREATE statements.
-    TODO: it's hard to tell that the cache is working (i.e. if it did nothing 
to drop
-    the cache, these tests would still pass). Testing that is a bit harder and 
requires
-    us to update the udf binary in the middle.
-    """
-    # The db may already exist, clean it up.
-    self.cleanup_db(db_name)
-    self._create_db(db_name, sync=True)
-    self.client.set_configuration(vector.get_value('exec_option'))
-    self.client.execute("use %s" % (db_name,))
-    for drop_stmt in drop_stmts: self.client.execute(drop_stmt % ("if exists"))
-    for i in xrange(0, num_iterations):
-      for create_stmt in create_stmts: self.client.execute(create_stmt)
-      self.client.execute(select_stmt)
-      for drop_stmt in drop_stmts: self.client.execute(drop_stmt % (""))
-
   @SkipIfLocal.hdfs_client
   @pytest.mark.execute_serially
   def test_create_alter_bulk_partition(self, vector):
@@ -560,56 +498,3 @@ class TestDdlStatements(ImpalaTestSuite):
     assert properties['prop1'] == 'val1'
     assert properties['p2'] == 'val3'
     assert properties[''] == ''
-
-  @pytest.mark.execute_serially
-  def test_create_db_comment(self, vector):
-    DB_NAME = 'db_with_comment'
-    COMMENT = 'A test comment'
-    self._create_db(DB_NAME, sync=True, comment=COMMENT)
-    result = self.client.execute("show databases like '{0}'".format(DB_NAME))
-    assert len(result.data) == 1
-    cols = result.data[0].split('\t')
-    assert len(cols) == 2
-    assert cols[0] == DB_NAME
-    assert cols[1] == COMMENT
-
-  @classmethod
-  def _use_multiple_impalad(cls, vector):
-    return vector.get_value('exec_option')['sync_ddl'] == 1
-
-  def _create_db(self, db_name, sync=False, comment=None):
-    """Creates a database using synchronized DDL to ensure all nodes have the 
test
-    database available for use before executing the .test file(s).
-    """
-    impala_client = self.create_impala_client()
-    sync and impala_client.set_configuration({'sync_ddl': 1})
-    if comment is None:
-      ddl = "create database {0} location '{1}/{0}.db'".format(db_name, 
WAREHOUSE)
-    else:
-      ddl = "create database {0} comment '{1}' location '{2}/{0}.db'".format(
-        db_name, comment, WAREHOUSE)
-    impala_client.execute(ddl)
-    impala_client.close()
-
-  def _get_tbl_properties(self, table_name):
-    """Extracts the table properties mapping from the output of DESCRIBE 
FORMATTED"""
-    return self._get_properties('Table Parameters:', table_name)
-
-  def _get_serde_properties(self, table_name):
-    """Extracts the serde properties mapping from the output of DESCRIBE 
FORMATTED"""
-    return self._get_properties('Storage Desc Params:', table_name)
-
-  def _get_properties(self, section_name, table_name):
-    """Extracts the table properties mapping from the output of DESCRIBE 
FORMATTED"""
-    result = self.client.execute("describe formatted " + table_name)
-    match = False
-    properties = dict();
-    for row in result.data:
-      if section_name in row:
-        match = True
-      elif match:
-        row = row.split('\t')
-        if (row[1] == 'NULL'):
-          break
-        properties[row[1].rstrip()] = row[2].rstrip()
-    return properties

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/19ff4709/tests/metadata/test_ddl_base.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_ddl_base.py b/tests/metadata/test_ddl_base.py
new file mode 100644
index 0000000..5eee77f
--- /dev/null
+++ b/tests/metadata/test_ddl_base.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2016 Cloudera, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from tests.common.test_vector import *
+from tests.common.test_dimensions import ALL_NODES_ONLY
+from tests.common.impala_test_suite import *
+from tests.util.filesystem_utils import WAREHOUSE, IS_LOCAL
+
+# Base class that most DDL tests inherit from. The tests have a few common 
functions,
+# as well as test dimensions and setup/teardown.
+class TestDdlBase(ImpalaTestSuite):
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestDdlBase, cls).add_test_dimensions()
+    sync_ddl_opts = [0, 1]
+    if cls.exploration_strategy() != 'exhaustive':
+      # Only run with sync_ddl on exhaustive since it increases test runtime.
+      sync_ddl_opts = [0]
+
+    cls.TestMatrix.add_dimension(create_exec_option_dimension(
+        cluster_sizes=ALL_NODES_ONLY,
+        disable_codegen_options=[False],
+        batch_sizes=[0],
+        sync_ddl=sync_ddl_opts))
+
+    # There is no reason to run these tests using all dimensions.
+    
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
+
+  def create_drop_ddl(self, vector, db_name, create_stmts, drop_stmts, 
select_stmt,
+      num_iterations=3):
+    """Helper method to run CREATE/DROP DDL commands repeatedly and exercise 
the lib
+    cache. create_stmts is the list of CREATE statements to be executed in 
order
+    drop_stmts is the list of DROP statements to be executed in order. Each 
statement
+    should have a '%s' placeholder to insert "IF EXISTS" or "". The 
select_stmt is just a
+    single statement to test after executing the CREATE statements.
+    TODO: it's hard to tell that the cache is working (i.e. if it did nothing 
to drop
+    the cache, these tests would still pass). Testing that is a bit harder and 
requires
+    us to update the udf binary in the middle.
+    """
+    # The db may already exist, clean it up.
+    self.cleanup_db(db_name)
+    self._create_db(db_name, sync=True)
+    self.client.set_configuration(vector.get_value('exec_option'))
+    self.client.execute("use %s" % (db_name,))
+    for drop_stmt in drop_stmts: self.client.execute(drop_stmt % ("if exists"))
+    for i in xrange(0, num_iterations):
+      for create_stmt in create_stmts: self.client.execute(create_stmt)
+      self.client.execute(select_stmt)
+      for drop_stmt in drop_stmts: self.client.execute(drop_stmt % (""))
+
+  @classmethod
+  def _use_multiple_impalad(cls, vector):
+    return vector.get_value('exec_option')['sync_ddl'] == 1
+
+  def _create_db(self, db_name, sync=False, comment=None):
+    """Creates a database using synchronized DDL to ensure all nodes have the 
test
+    database available for use before executing the .test file(s).
+    """
+    impala_client = self.create_impala_client()
+    sync and impala_client.set_configuration({'sync_ddl': 1})
+    if comment is None:
+      ddl = "create database {0} location '{1}/{0}.db'".format(db_name, 
WAREHOUSE)
+    else:
+      ddl = "create database {0} comment '{1}' location '{2}/{0}.db'".format(
+        db_name, comment, WAREHOUSE)
+    impala_client.execute(ddl)
+    impala_client.close()
+
+  def _get_tbl_properties(self, table_name):
+    """Extracts the table properties mapping from the output of DESCRIBE 
FORMATTED"""
+    return self._get_properties('Table Parameters:', table_name)
+
+  def _get_serde_properties(self, table_name):
+    """Extracts the serde properties mapping from the output of DESCRIBE 
FORMATTED"""
+    return self._get_properties('Storage Desc Params:', table_name)
+
+  def _get_properties(self, section_name, table_name):
+    """Extracts the table properties mapping from the output of DESCRIBE 
FORMATTED"""
+    result = self.client.execute("describe formatted " + table_name)
+    match = False
+    properties = dict();
+    for row in result.data:
+      if section_name in row:
+        match = True
+      elif match:
+        row = row.split('\t')
+        if (row[1] == 'NULL'):
+          break
+        properties[row[1].rstrip()] = row[2].rstrip()
+    return properties

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/19ff4709/tests/metadata/test_hms_integration.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_hms_integration.py 
b/tests/metadata/test_hms_integration.py
index 51b0106..1328e37 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -21,17 +21,61 @@
 # Impala, in all the possible ways of validating that metadata.
 
 
-import logging
 import pytest
 import random
-import shlex
 import string
-import subprocess
+from subprocess import call
 from tests.common.test_result_verifier import *
 from tests.common.test_vector import *
 from tests.common.impala_test_suite import *
 from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal
 
[email protected]
[email protected]
[email protected]
+class TestHmsIntegrationSanity(ImpalaTestSuite):
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestHmsIntegrationSanity, cls).add_test_dimensions()
+    # There is no reason to run these tests using all dimensions.
+    cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
+    cls.TestMatrix.add_dimension(
+        create_uncompressed_text_dimension(cls.get_workload()))
+
+  @pytest.mark.execute_serially
+  def test_sanity(self, vector):
+    """Verifies that creating a catalog entity (database, table) in Impala 
using
+    'IF NOT EXISTS' while the entity exists in HMS, does not throw an error."""
+    # Create a database in Hive
+    ret = call(["hive", "-e", "create database test_db"])
+    assert ret == 0
+    # Creating a database with the same name using 'IF NOT EXISTS' in Impala 
should
+    # not fail
+    self.client.execute("create database if not exists test_db")
+    # The database should appear in the catalog (IMPALA-2441)
+    assert 'test_db' in self.all_db_names()
+    # Ensure a table can be created in this database from Impala and that it is
+    # accessable in both Impala and Hive
+    self.client.execute("create table if not exists 
test_db.test_tbl_in_impala(a int)")
+    ret = call(["hive", "-e", "select * from test_db.test_tbl_in_impala"])
+    assert ret == 0
+    self.client.execute("select * from test_db.test_tbl_in_impala")
+
+    # Create a table in Hive
+    ret = call(["hive", "-e", "create table test_db.test_tbl (a int)"])
+    assert ret == 0
+    # Creating a table with the same name using 'IF NOT EXISTS' in Impala 
should
+    # not fail
+    self.client.execute("create table if not exists test_db.test_tbl (a int)")
+    # The table should not appear in the catalog unless invalidate metadata is
+    # executed
+    assert 'test_tbl' not in self.client.execute("show tables in test_db").data
+    self.client.execute("invalidate metadata test_db.test_tbl")
+    assert 'test_tbl' in self.client.execute("show tables in test_db").data
 
 @SkipIfS3.hive
 @SkipIfIsilon.hive

Reply via email to