Repository: incubator-impala
Updated Branches:
  refs/heads/master d44df8368 -> 8d4f8d8d9


IMPALA-4343,IMPALA-4354: qgen: model INSERTs; write INSERTs from query model

This patch adds support to the random query generator infrastructure to
model and write SQL INSERTs. It does not actually randomly generate
INSERTs at this time (tracked in IMPALA-4353 and umbrella task
IMPALA-3740) but does provide necessary building blocks to do so.

First, it's necessary to model the INSERTs as part of our data model.
This was done by taking the current notion of a Query and making it a
SelectQuery. We also then create an abstract Query containing some of
the more common methods and attributes. We then model an INSERT query,
INSERT clause, and VALUES clause (IMPALA-4343).

Second, it's necessary to test the basics of this data model. It made
sense to go ahead and implement the necessary SqlWriter methods to write
the SQL for these clauses (IMPALA-4354).

I could then use this writer with some existing and new tests that take
a query written into our data model and write the SQL, verifying they're
correct.

For INSERT into Kudu tables, the equivalent PostgreSQL queries need to
use "ON CONFLICT DO NOTHING", so all existing and new query tests verify
they can be written as PostgreSQL as well.

Testing:
- all the query generator tests pass
- I can run Leopard front_end.py and load older query generator reports,
  browse them, and re-run failed queries
- I can run Leopard controller.py to actually do a query generator
  run
- discrepancy_searcher.py --explain-only ran for hundreds of queries.
  There were no problems writing the SELECT queries

Change-Id: I38e24da78c49e908449b35f0a6276ebe4236ddba
Reviewed-on: http://gerrit.cloudera.org:8080/5162
Reviewed-by: Michael Brown <[email protected]>
Reviewed-by: Taras Bobrovytsky <[email protected]>
Tested-by: Taras Bobrovytsky <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/8d4f8d8d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/8d4f8d8d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/8d4f8d8d

Branch: refs/heads/master
Commit: 8d4f8d8d9317d2f562cbc7e1517906cd7d6cd8e1
Parents: d44df83
Author: Michael Brown <[email protected]>
Authored: Mon Nov 14 13:53:03 2016 -0800
Committer: Taras Bobrovytsky <[email protected]>
Committed: Fri Dec 2 20:49:43 2016 +0000

----------------------------------------------------------------------
 tests/comparison/model_translator.py            | 139 ++++++--
 tests/comparison/query.py                       | 193 +++++++++-
 tests/comparison/tests/query_object_testdata.py | 350 ++++++++++++++++++-
 tests/comparison/tests/test_query_objects.py    |  28 +-
 4 files changed, 642 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8d4f8d8d/tests/comparison/model_translator.py
----------------------------------------------------------------------
diff --git a/tests/comparison/model_translator.py 
b/tests/comparison/model_translator.py
index b3d4a97..61b041e 100644
--- a/tests/comparison/model_translator.py
+++ b/tests/comparison/model_translator.py
@@ -29,11 +29,12 @@ from db_types import (
     String,
     Timestamp,
     VarChar)
-from query import Query
+from query import InsertStatement, Query
 from query_flattener import QueryFlattener
 
 LOG = getLogger(__name__)
 
+
 class SqlWriter(object):
   '''Subclasses of SQLWriter will take a Query and provide the SQL 
representation for a
      specific database such as Impala or MySQL. The 
SqlWriter.create([dialect=])
@@ -89,12 +90,26 @@ class SqlWriter(object):
         'IsNull': '({0}) IS NULL',
         'IsNotNull': '({0}) IS NOT NULL'}
 
-  def write_query(self, query, pretty=False):
-    '''Return SQL as a string for the given query.
+  def write_query(self, statement, pretty=False):
+    """
+    Return SQL as a string for the given query.
 
-       If "pretty" is True, the SQL will be formatted (though not very well) 
with new
-       lines and indentation.
-    '''
+    If "pretty" is True, the SQL will be formatted (though not very well) with 
new
+    lines and indentation.
+    """
+    sql = self._write(statement)
+    if pretty:
+      sql = self.make_pretty_sql(sql)
+    return sql
+
+  def _write_query(self, query):
+    """
+    Taking in a Query object with some attributes set, return a string
+    representation of the query in the correct dialect.
+
+    This is just another dispatch destination of self._write(). When
+    self._write(Query) is called, that's dispatched to self._write_query(Query)
+    """
     sql = list()
     # Write out each section in the proper order
     for clause in (
@@ -106,12 +121,36 @@ class SqlWriter(object):
         query.having_clause,
         query.union_clause,
         query.order_by_clause,
-        query.limit_clause):
+        query.limit_clause
+    ):
       if clause:
         sql.append(self._write(clause))
     sql = '\n'.join(sql)
-    if pretty:
-      sql = self.make_pretty_sql(sql)
+    return sql
+
+  def _write_insert_statement(self, insert_statement):
+    """
+    Taking in a InsertStatement object with some attributes set, return a 
string
+    representation of the query in the correct dialect.
+    """
+    sql = list()
+
+    if insert_statement.with_clause:
+      sql.append(self._write(insert_statement.with_clause))
+
+    if insert_statement.insert_clause:
+      sql.append(self._write(insert_statement.insert_clause))
+    else:
+      raise Exception('InsertStatement is missing insert_clause attribute')
+
+    if insert_statement.select_query and not insert_statement.values_clause:
+      sql.append(self._write(insert_statement.select_query))
+    elif not insert_statement.select_query and insert_statement.values_clause:
+      sql.append(self._write(insert_statement.values_clause))
+    else:
+      raise Exception('InsertStatement must have a select_query xor a values 
clause')
+
+    sql = '\n'.join(sql)
     return sql
 
   def make_pretty_sql(self, sql):
@@ -150,14 +189,14 @@ class SqlWriter(object):
 
   def _write_struct_column(self, struct_col):
     if isinstance(struct_col.owner, StructColumn) or \
-        (isinstance(struct_col.owner, CollectionColumn) and not 
struct_col.owner.alias):
+       (isinstance(struct_col.owner, CollectionColumn) and not 
struct_col.owner.alias):
       return '%s.%s' % (self._write(struct_col.owner), struct_col.name)
     else:
       return '%s.%s' % (struct_col.owner.identifier, struct_col.name)
 
   def _write_collection_column(self, collection_col):
-    if isinstance(collection_col.owner,
-        (StructColumn, CollectionColumn)) and not collection_col.owner.alias:
+    if isinstance(collection_col.owner, (StructColumn, CollectionColumn)) and \
+       not collection_col.owner.alias:
       if collection_col.alias:
         return '%s.%s %s' % (
             self._write(collection_col.owner),
@@ -385,6 +424,35 @@ class SqlWriter(object):
   def _write_limit_clause(self, limit_clause):
     return 'LIMIT {0}'.format(limit_clause.limit)
 
+  def _write_insert_clause(self, insert_clause):
+    """
+    Given an InsertClause, return a string representing that portion of the 
query. The
+    InsertClause object may have the column_list attribute set, which is a
+    sequence of columns.
+    """
+    if insert_clause.column_list is None:
+      column_list = ''
+    else:
+      column_list = ' ({column_list})'.format(
+          column_list=', '.join([col.name for col in 
insert_clause.column_list]))
+    return 'INSERT INTO {table_name}{column_list}'.format(
+        table_name=insert_clause.table.name, column_list=column_list)
+
+  def _write_values_row(self, values_row):
+    """
+    Return a string representing 1 row of a VALUES clause.
+    """
+    return '({values_row})'.format(
+        values_row=', '.join([self._write(item) for item in values_row.items]))
+
+  def _write_values_clause(self, values_clause):
+    """
+    Return a string representing the VALUES clause of an INSERT query.
+    """
+    return 'VALUES {values_rows}'.format(
+        values_rows=', '.join([self._write(values_row)
+                               for values_row in values_clause.values_rows]))
+
   def _write(self, object_):
     '''Return a sql string representation of the given object.'''
     # What's below is effectively a giant switch statement. It works based on 
a func
@@ -460,9 +528,9 @@ class HiveSqlWriter(SqlWriter):
     super(HiveSqlWriter, self).__init__(*args, **kwargs)
 
     self.operator_funcs.update({
-      'IsNotDistinctFrom': '({0}) <=> ({1})',
-      'IsNotDistinctFromOp': '({0}) <=> ({1})',
-      'IsDistinctFrom': 'NOT(({0}) <=> ({1}))'
+        'IsNotDistinctFrom': '({0}) <=> ({1})',
+        'IsNotDistinctFromOp': '({0}) <=> ({1})',
+        'IsDistinctFrom': 'NOT(({0}) <=> ({1}))'
     })
 
   # Hive greatest UDF is strict on type equality
@@ -474,11 +542,9 @@ class HiveSqlWriter(SqlWriter):
     args = func.args
     if args[0].type in (Int, Decimal, Float):
       argtype = args[0].type.__name__.lower()
-      sql = '%s(%s)' % \
-            (self._to_sql_name(func.name()),
-             (self._write_cast(args[0], argtype)
-              + ", "
-              + self._write_cast(args[1], argtype)))
+      sql = '%s(%s)' % (
+          self._to_sql_name(func.name()),
+          self._write_cast(args[0], argtype) + ", " + 
self._write_cast(args[1], argtype))
     else:
       sql = self._write_func(func)
     return sql
@@ -492,11 +558,9 @@ class HiveSqlWriter(SqlWriter):
     args = func.args
     if args[0].type in (Int, Decimal, Float):
       argtype = args[0].type.__name__.lower()
-      sql = '%s(%s)' % \
-            (self._to_sql_name(func.name()),
-             (self._write_cast(args[0], argtype)
-              + ", "
-              + self._write_cast(args[1], argtype)))
+      sql = '%s(%s)' % (
+          self._to_sql_name(func.name()),
+          self._write_cast(args[0], argtype) + ", " + 
self._write_cast(args[1], argtype))
     else:
       sql = self._write_func(func)
     return sql
@@ -516,12 +580,13 @@ class HiveSqlWriter(SqlWriter):
   def arithmetic_cast(self, func, symbol):
     args = func.args
     if args[0].type is Int and args[1].type is Int:
-      return 'CAST (%s AS BIGINT) %s CAST (%s AS BIGINT)' % \
-        (self._write(args[0]), symbol, self._write(args[1]))
+      return 'CAST (%s AS BIGINT) %s CAST (%s AS BIGINT)' % (
+          self._write(args[0]), symbol, self._write(args[1]))
     else:
       return self._write_func(func)
 
-  # Hive partition by clause throws exception if sorted by more than one key, 
unless 'rows unbounded preceding' added.
+  # Hive partition by clause throws exception if sorted by more than one key, 
unless
+  # 'rows unbounded preceding' added.
   def _write_analytic_func(self, func):
     sql = self._to_sql_name(func.name()) \
         + '(' + self._write_as_comma_list(func.args) \
@@ -562,6 +627,17 @@ class PostgresqlSqlWriter(SqlWriter):
 
   DIALECT = 'POSTGRESQL'
 
+  def _write_insert_statement(self, insert_statement):
+    sql = SqlWriter._write_insert_statement(self, insert_statement)
+    if insert_statement.conflict_action == 
InsertStatement.CONFLICT_ACTION_DEFAULT:
+      pass
+    elif insert_statement.conflict_action == 
InsertStatement.CONFLICT_ACTION_IGNORE:
+      sql += '\nON CONFLICT DO NOTHING'
+    else:
+      raise Exception('InsertStatement has unsupported conflict_action: 
{0}'.format(
+          insert_statement.conflict_action))
+    return sql
+
   def _write_date_add_year(self, func):
     return "%s + (%s) * INTERVAL '1' YEAR" \
         % (self._write(func.args[0]), self._write(func.args[1]))
@@ -602,11 +678,11 @@ class PostgresqlSqlWriter(SqlWriter):
         col = col.owner
       return col
     return '%s.%s' % (first_non_struct_ancestor(col).identifier,
-        QueryFlattener.flat_column_name(col))
+                      QueryFlattener.flat_column_name(col))
 
   def _write_collection_column(self, collection_col):
     return '%s %s' % (QueryFlattener.flat_collection_name(collection_col),
-        collection_col.identifier)
+                      collection_col.identifier)
 
   def _write_extract_second(self, func):
     # For some reason Postgresql decided that extracting second should return 
a FLOAT...
@@ -672,7 +748,8 @@ class MySQLSqlWriter(SqlWriter):
         query.having_clause,
         query.union_clause,
         query.order_by_clause,
-        query.limit_clause):
+        query.limit_clause
+    ):
       if clause:
         sql.append(self._write(clause))
     sql = '\n'.join(sql)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8d4f8d8d/tests/comparison/query.py
----------------------------------------------------------------------
diff --git a/tests/comparison/query.py b/tests/comparison/query.py
index f6ec65c..8821a85 100644
--- a/tests/comparison/query.py
+++ b/tests/comparison/query.py
@@ -15,22 +15,72 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from abc import ABCMeta, abstractproperty
 from copy import deepcopy
 from logging import getLogger
 
 from common import Column, TableExpr, TableExprList, ValExpr, ValExprList
-from db_types import Float
 
 LOG = getLogger(__name__)
 
-class Query(object):
-  '''A representation of the structure of a SQL query. Only the select_clause 
and
-     from_clause are required for a valid query.
-  '''
+
+class AbstractStatement(object):
+  """
+  Abstract query representation
+  """
+
+  __metaclass__ = ABCMeta
 
   def __init__(self):
+    # reference to statement's parent. For example the right side of a UNION 
clause
+    # SELECT will have a parent as the SELECT on the left, which for the query
+    # generator's purpose is the parent
     self.parent = None
+    # optional WITH clause some statements may have
     self.with_clause = None
+    # Used by QueryExecutor to track whether this query is "raw", a CREATE 
TABLE AS
+    # SELECT, or CREATE VIEW AS SELECT
+    # TODO: Instead of plain strings, these values should be from some 
enumerated type.
+    self.execution = 'RAW'
+
+  @abstractproperty
+  def table_exprs(self):
+    """
+    Return a list of all table expressions that are declared by this query. 
This is
+    abstract as the clauses that do this differ across query types. Since all 
supported
+    queries may have a WITH clause, getting table expressions from the WITH 
clause is
+    supported here.
+    """
+    # This is an abstractproperty because it's only a *partial* 
implementation, however
+    # for any statement or query that has a WITH clause, we can handle that 
here.
+    table_exprs = TableExprList([])
+    if self.with_clause:
+      table_exprs.extend(self.with_clause.table_exprs)
+    return table_exprs
+
+  @abstractproperty
+  def nested_queries(self):
+    """
+    Returns a list of queries contained within this query. Different queries 
may have
+    different clauses containing subqueries, so this is an abtract property.
+    """
+    pass
+
+
+class Query(AbstractStatement):
+  # TODO: This has to be called Query for as long as we want to unpickle old 
reports, or
+  # we have to get into the legalese weeds. See:
+  # https://gerrit.cloudera.org/#/c/5162/5/tests/comparison/query.py@61
+  # 
https://gerrit.cloudera.org/#/c/5162/1/tests/comparison/leopard/custom_pickle.py@9
+  # If we decide at some point we don't need to unpickle some of the recent 
reports,
+  # then this can be renamed to something like SelectStatement.
+  """
+  A representation of the structure of a SQL SELECT query. Only the 
select_clause and
+  from_clause are required for a valid query.
+  """
+
+  def __init__(self):
+    super(Query, self).__init__()
     self.select_clause = None
     self.from_clause = None
     self.where_clause = None
@@ -39,13 +89,13 @@ class Query(object):
     self.union_clause = None
     self.order_by_clause = None
     self.limit_clause = None
-    self.execution = 'RAW'
 
   def __deepcopy__(self, memo):
     other = Query()
     memo[self] = other
     other.parent = memo[self.parent] if self.parent in memo else None
     other.with_clause = deepcopy(self.with_clause, memo)
+    other.execution = self.execution
     other.from_clause = deepcopy(self.from_clause, memo)
     other.select_clause = deepcopy(self.select_clause, memo)
     other.where_clause = deepcopy(self.where_clause, memo)
@@ -54,7 +104,6 @@ class Query(object):
     other.union_clause = deepcopy(self.union_clause, memo)
     other.order_by_clause = deepcopy(self.order_by_clause, memo)
     other.limit_clause = deepcopy(self.limit_clause, memo)
-    other.execution = self.execution
     return other
 
   @property
@@ -62,16 +111,11 @@ class Query(object):
     '''Provides a list of all table_exprs that are declared by this query. This
        includes table_exprs in the WITH and FROM sections.
     '''
-    table_exprs = self.from_clause.table_exprs
-    if self.with_clause:
-      table_exprs += self.with_clause.table_exprs
+    table_exprs = super(Query, self).table_exprs  # WITH clause
+    table_exprs.extend(self.from_clause.table_exprs)
     return table_exprs
 
   @property
-  def is_nested_query(self):
-    return self.parent
-
-  @property
   def is_unioned_query(self):
     return self.parent \
         and self.parent.union_clause \
@@ -90,7 +134,8 @@ class Query(object):
     if self.union_clause:
       queries.append(self.union_clause.query)
     if self.where_clause:
-      queries.extend(subquery.query for subquery in \
+      queries.extend(
+          subquery.query for subquery in
           self.where_clause.boolean_expr.iter_exprs(lambda expr: 
expr.is_subquery))
     for query in list(queries):
       queries.extend(query.nested_queries)
@@ -349,6 +394,7 @@ class Subquery(ValExpr):
   def __deepcopy__(self, memo):
     return Subquery(deepcopy(self.query, memo))
 
+
 class FromClause(object):
   '''A representation of a FROM clause. The member variable join_clauses may 
optionally
      contain JoinClause items.
@@ -370,7 +416,8 @@ class FromClause(object):
 
   def __deepcopy__(self, memo):
     other = FromClause(deepcopy(self.table_expr, memo))
-    other.join_clauses = [deepcopy(join_clause, memo) for join_clause in 
self.join_clauses]
+    other.join_clauses = [deepcopy(join_clause, memo)
+                          for join_clause in self.join_clauses]
     return other
 
   @property
@@ -397,6 +444,7 @@ class FromClause(object):
       if 'ANTI' in join_clause.join_type or 'SEMI' in join_clause.join_type:
         return True
 
+
 class InlineView(TableExpr):
   '''Represents an inline view.
 
@@ -602,7 +650,7 @@ class OrderByClause(object):
       self.exprs_to_order.append((item, order))
 
   def __deepcopy__(self, memo):
-    other = OrderByClause(val_exprs = list())
+    other = OrderByClause(val_exprs=list())
     for (item, order) in self.exprs_to_order:
       other.exprs_to_order.append((deepcopy(item, memo), order))
     return other
@@ -614,4 +662,113 @@ class LimitClause(object):
     self.limit = limit
 
   def __deepcopy__(self, memo):
-    return LimitClause(deepcopy(limit, memo))
+    return LimitClause(deepcopy(self.limit, memo))
+
+
+class InsertClause(object):
+
+  def __init__(self, table, column_list=None):
+    """
+    Represent an INSERT clause, which is the first half of an INSERT 
statement. The
+    table is a Table object.
+
+    column_list is an optional list, tuple, or other sequence of
+    tests.comparison.common.Column objects.
+
+    In an INSERT statement, it's a sequence of column names. See
+    
http://www.cloudera.com/documentation/enterprise/latest/topics/impala_insert.html
+    """
+    self.table = table
+    self.column_list = column_list
+
+
+class ValuesRow(object):
+  def __init__(self, items):
+    """
+    Represent a single row in a VALUES clause. The items are literals or 
expressions.
+    """
+    self.items = items
+
+
+class ValuesClause(object):
+  def __init__(self, values_rows):
+    """
+    Represent the VALUES clause of an INSERT statement. The values_rows is a 
sequence of
+    ValuesRow objects.
+    """
+    self.values_rows = values_rows
+
+
+class InsertStatement(AbstractStatement):
+
+  (CONFLICT_ACTION_DEFAULT,
+   CONFLICT_ACTION_IGNORE) = range(2)
+
+  def __init__(self, with_clause=None, insert_clause=None, select_query=None,
+               values_clause=None, conflict_action=CONFLICT_ACTION_DEFAULT):
+    """
+    Represent an INSERT statement. The INSERT may have an optional WithClause, 
and then
+    either a SELECT query (Query) object from whose rows we INSERT, or a 
VALUES clause,
+    but not both.
+
+    conflict_action takes in one of the CONFLICT_ACTION_* class attributes. On 
INSERT if
+    the conflict_action is CONFLICT_ACTION_DEFAULT, we write standard INSERT 
queries.
+
+    If CONFLICT_ACTION_IGNORE is chosen instead, PostgreSQL INSERTs will use 
"ON
+    CONFLICT DO NOTHING". The syntax doesn't change for Impala, but the implied
+    semantics are needed: if we are INSERTing a Kudu table, conflict_action 
must be
+    CONFLICT_ACTION_IGNORE.
+    """
+    super(InsertStatement, self).__init__()
+    self._select_query = None
+    self._values_clause = None
+
+    self.select_query = select_query
+    self.values_clause = values_clause
+    self.with_clause = with_clause
+    self.insert_clause = insert_clause
+    self.conflict_action = conflict_action
+
+  @property
+  def select_query(self):
+    return self._select_query
+
+  @select_query.setter
+  def select_query(self, select_query):
+    if self.values_clause is None or select_query is None:
+      self._select_query = select_query
+    else:
+      raise Exception('An INSERT statement may not have both the select_query 
and '
+                      'values_clause set: {select}; {values}'.format(
+                          select=select_query, values=self.values_clause))
+
+  @property
+  def values_clause(self):
+    return self._values_clause
+
+  @values_clause.setter
+  def values_clause(self, values_clause):
+    if self.select_query is None or values_clause is None:
+      self._values_clause = values_clause
+    else:
+      raise Exception('An INSERT statement may not have both the select_query 
and '
+                      'values_clause set: {select}; {values}'.format(
+                          select=self.select_query, values=values_clause))
+
+  @property
+  def table_exprs(self):
+    table_exprs = super(InsertStatement, self).table_exprs  # WITH clause
+    if self.select_query is not None:
+      table_exprs.extend(self.select_query.table_exprs)
+    return table_exprs
+
+  @property
+  def nested_queries(self):
+    queries = list()
+    if self.with_clause is not None:
+      for inline_view in self.with_clause.with_clause_inline_views:
+        queries.append(inline_view.query)
+    if self.select_query is not None:
+      queries.append(self.select_query)
+      queries.extend(self.select_query.nested_queries)
+    return queries

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8d4f8d8d/tests/comparison/tests/query_object_testdata.py
----------------------------------------------------------------------
diff --git a/tests/comparison/tests/query_object_testdata.py 
b/tests/comparison/tests/query_object_testdata.py
index c2b3d3e..dc2f839 100644
--- a/tests/comparison/tests/query_object_testdata.py
+++ b/tests/comparison/tests/query_object_testdata.py
@@ -17,20 +17,34 @@
 
 from collections import namedtuple
 
-from fake_query import FakeColumn, FakeFirstValue, FakeQuery, 
FakeSelectClause, FakeTable
+from fake_query import (
+    FakeColumn,
+    FakeFirstValue,
+    FakeQuery,
+    FakeSelectClause,
+    FakeTable)
+from tests.comparison.common import TableExprList
 from tests.comparison.db_types import Char, Int
 from tests.comparison.funcs import AggCount
-from tests.comparison.query import FromClause, OrderByClause
+from tests.comparison.query import (
+    FromClause,
+    InsertClause,
+    InsertStatement,
+    OrderByClause,
+    ValuesClause,
+    ValuesRow,
+    WithClause,
+    WithClauseInlineView)
 
 
 QueryTest = namedtuple(
-    # A QueryTest object contains a Query and all data to verify about it as 
other
-    # attributes. This allows a new Query to be added without need to modify 
tests
-    # themselves. The various tests cherry-pick which test attributes they 
need to
+    # A QueryTest object contains a SELECT query and all data to verify about 
it
+    # as other attributes. This allows a new query to be added without need to 
modify
+    # tests themselves. The various tests cherry-pick which test attributes 
they need to
     # verify against the Query.
     #
     # If you add a new test, add a new attribute, or perhaps reuse one or more
-    # attributes.
+    # existing attributes.
     #
     # If you add a new test case, add a new item to QUERY_TEST_CASESs array.
     #
@@ -41,13 +55,19 @@ QueryTest = namedtuple(
         'testid',
         # Query object, formed via FakeQuery
         'query',
-        # textual form of FakeQuery
+        # textual form of query in Impala dialect
         'impala_query_string',
-        # hash representing various item counts (see SelectItem property 
methods)
+        # textual form of query in PostgreSQL dialect
+        'postgres_query_string',
+        # dictionary representing various item counts (see SelectItem property 
methods)
         'select_item_counts',
     ]
 )
 
+InsertStatementTest = namedtuple('InsertStatementTest',
+                                 ['testid', 'query', 'impala_query_string',
+                                  'postgres_query_string'])
+
 
 # FakeTables must be declared for use by queries. Tables may be reused as 
needed for
 # multiple FakeQueries.
@@ -59,9 +79,29 @@ SIMPLE_TABLE = FakeTable(
     ]
 )
 
+KUDU_TABLE = FakeTable(
+    'kudu_table',
+    [
+        FakeColumn('int_col', Int, is_primary_key=True),
+        FakeColumn('char_col', Char),
+    ]
+)
+
+# This can't be used inline because we need its table expressions later.
+SIMPLE_WITH_CLAUSE = WithClause(
+    TableExprList([
+        WithClauseInlineView(
+            FakeQuery(
+                select_clause=FakeSelectClause(SIMPLE_TABLE.cols[0]),
+                from_clause=FromClause(SIMPLE_TABLE)
+            ),
+            'with_view'
+        )
+    ])
+)
 
-# All tests involving queries should be written to use this dataset.
-QUERY_TEST_CASES = [
+# All tests involving SELECT queries should be written to use this data set.
+SELECT_QUERY_TEST_CASES = [
     QueryTest(
         testid='select col from table',
         query=FakeQuery(
@@ -74,6 +114,12 @@ QUERY_TEST_CASES = [
             'TRIM(fake_table.char_col)\n'
             'FROM fake_table'
         ),
+        postgres_query_string=(
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'fake_table.char_col\n'
+            'FROM fake_table'
+        ),
         select_item_counts={
             'items': 2,
             'basic_items': 2,
@@ -93,6 +139,11 @@ QUERY_TEST_CASES = [
             'COUNT(fake_table.int_col)\n'
             'FROM fake_table'
         ),
+        postgres_query_string=(
+            'SELECT\n'
+            'COUNT(fake_table.int_col)\n'
+            'FROM fake_table'
+        ),
         select_item_counts={
             'items': 1,
             'basic_items': 0,
@@ -116,6 +167,11 @@ QUERY_TEST_CASES = [
             'FIRST_VALUE(fake_table.int_col) OVER (ORDER BY fake_table.int_col 
ASC)\n'
             'FROM fake_table'
         ),
+        postgres_query_string=(
+            'SELECT\n'
+            'FIRST_VALUE(fake_table.int_col) OVER (ORDER BY fake_table.int_col 
ASC)\n'
+            'FROM fake_table'
+        ),
         select_item_counts={
             'items': 1,
             'basic_items': 0,
@@ -124,3 +180,277 @@ QUERY_TEST_CASES = [
         },
     ),
 ]
+
+INSERT_QUERY_TEST_CASES = [
+    InsertStatementTest(
+        testid='insert into table select cols',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE),
+            select_query=FakeQuery(
+                select_clause=FakeSelectClause(*SIMPLE_TABLE.cols),
+                from_clause=FromClause(SIMPLE_TABLE)
+            ),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table\n'
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'TRIM(fake_table.char_col)\n'
+            'FROM fake_table'
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table\n'
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'fake_table.char_col\n'
+            'FROM fake_table'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert into table column permutations select cols',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE, 
column_list=KUDU_TABLE.cols),
+            select_query=FakeQuery(
+                select_clause=FakeSelectClause(*SIMPLE_TABLE.cols),
+                from_clause=FromClause(SIMPLE_TABLE)
+            ),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table (int_col, char_col)\n'
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'TRIM(fake_table.char_col)\n'
+            'FROM fake_table'
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table (int_col, char_col)\n'
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'fake_table.char_col\n'
+            'FROM fake_table'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert into table partial column permutation select 1 col',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE,
+                                       column_list=[KUDU_TABLE.cols[0]]),
+            select_query=FakeQuery(
+                select_clause=FakeSelectClause(SIMPLE_TABLE.cols[0]),
+                from_clause=FromClause(SIMPLE_TABLE)
+            ),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table (int_col)\n'
+            'SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table'
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table (int_col)\n'
+            'SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert into table select 1 col',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE),
+            select_query=FakeQuery(
+                select_clause=FakeSelectClause(SIMPLE_TABLE.cols[0]),
+                from_clause=FromClause(SIMPLE_TABLE)
+            ),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table\n'
+            'SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table'
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table\n'
+            'SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert 2 value rows',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE),
+            values_clause=ValuesClause((
+                ValuesRow((Int(1), Char('a'))),
+                ValuesRow((Int(2), Char('b'))),
+            )),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table\n'
+            "VALUES (1, 'a'), (2, 'b')"
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table\n'
+            "VALUES (1, 'a' || ''), (2, 'b' || '')"
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert 1 value',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE),
+            values_clause=ValuesClause((
+                ValuesRow((Int(1),)),
+            )),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table\n'
+            'VALUES (1)'
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table\n'
+            'VALUES (1)'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert value row with full column permutation',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE, 
column_list=KUDU_TABLE.cols),
+            values_clause=ValuesClause((
+                ValuesRow((Int(1), Char('a'))),
+            )),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table (int_col, char_col)\n'
+            "VALUES (1, 'a')"
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table (int_col, char_col)\n'
+            "VALUES (1, 'a' || '')"
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert value row with partial column permutation',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE,
+                                       column_list=(KUDU_TABLE.cols[0],)),
+            values_clause=ValuesClause((
+                ValuesRow((Int(1),)),
+            )),
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table (int_col)\n'
+            "VALUES (1)"
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table (int_col)\n'
+            "VALUES (1)"
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert values seleted from with clause',
+        query=InsertStatement(
+            with_clause=SIMPLE_WITH_CLAUSE,
+            insert_clause=InsertClause(KUDU_TABLE,
+                                       column_list=(KUDU_TABLE.cols[0],)),
+            select_query=FakeQuery(
+                
select_clause=FakeSelectClause(*SIMPLE_WITH_CLAUSE.table_exprs[0].cols),
+                from_clause=FromClause(SIMPLE_WITH_CLAUSE.table_exprs[0])
+            ),
+        ),
+        impala_query_string=(
+            'WITH with_view AS (SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table)\n'
+            'INSERT INTO kudu_table (int_col)\n'
+            'SELECT\n'
+            'with_view.int_col\n'
+            'FROM with_view'
+        ),
+        postgres_query_string=(
+            'WITH with_view AS (SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table)\n'
+            'INSERT INTO kudu_table (int_col)\n'
+            'SELECT\n'
+            'with_view.int_col\n'
+            'FROM with_view'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert into table select cols ignore conflicts',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE),
+            select_query=FakeQuery(
+                select_clause=FakeSelectClause(*SIMPLE_TABLE.cols),
+                from_clause=FromClause(SIMPLE_TABLE)
+            ),
+            conflict_action=InsertStatement.CONFLICT_ACTION_IGNORE
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table\n'
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'TRIM(fake_table.char_col)\n'
+            'FROM fake_table'
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table\n'
+            'SELECT\n'
+            'fake_table.int_col,\n'
+            'fake_table.char_col\n'
+            'FROM fake_table\n'
+            'ON CONFLICT DO NOTHING'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert 2 value rows ignore conflicts',
+        query=InsertStatement(
+            insert_clause=InsertClause(KUDU_TABLE),
+            values_clause=ValuesClause((
+                ValuesRow((Int(1), Char('a'))),
+                ValuesRow((Int(2), Char('b'))),
+            )),
+            conflict_action=InsertStatement.CONFLICT_ACTION_IGNORE
+        ),
+        impala_query_string=(
+            'INSERT INTO kudu_table\n'
+            "VALUES (1, 'a'), (2, 'b')"
+        ),
+        postgres_query_string=(
+            'INSERT INTO kudu_table\n'
+            "VALUES (1, 'a' || ''), (2, 'b' || '')\n"
+            'ON CONFLICT DO NOTHING'
+        ),
+    ),
+    InsertStatementTest(
+        testid='insert values seleted from with clause ignore conflicts',
+        query=InsertStatement(
+            with_clause=SIMPLE_WITH_CLAUSE,
+            insert_clause=InsertClause(KUDU_TABLE,
+                                       column_list=(KUDU_TABLE.cols[0],)),
+            select_query=FakeQuery(
+                
select_clause=FakeSelectClause(*SIMPLE_WITH_CLAUSE.table_exprs[0].cols),
+                from_clause=FromClause(SIMPLE_WITH_CLAUSE.table_exprs[0])
+            ),
+            conflict_action=InsertStatement.CONFLICT_ACTION_IGNORE
+        ),
+        impala_query_string=(
+            'WITH with_view AS (SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table)\n'
+            'INSERT INTO kudu_table (int_col)\n'
+            'SELECT\n'
+            'with_view.int_col\n'
+            'FROM with_view'
+        ),
+        postgres_query_string=(
+            'WITH with_view AS (SELECT\n'
+            'fake_table.int_col\n'
+            'FROM fake_table)\n'
+            'INSERT INTO kudu_table (int_col)\n'
+            'SELECT\n'
+            'with_view.int_col\n'
+            'FROM with_view\n'
+            'ON CONFLICT DO NOTHING'
+        ),
+    )
+]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8d4f8d8d/tests/comparison/tests/test_query_objects.py
----------------------------------------------------------------------
diff --git a/tests/comparison/tests/test_query_objects.py 
b/tests/comparison/tests/test_query_objects.py
index b0794d2..029f524 100644
--- a/tests/comparison/tests/test_query_objects.py
+++ b/tests/comparison/tests/test_query_objects.py
@@ -17,9 +17,12 @@
 
 import pytest
 
-from tests.comparison.model_translator import SqlWriter
+from tests.comparison.model_translator import (
+    ImpalaSqlWriter,
+    PostgresqlSqlWriter,
+    SqlWriter)
 
-from query_object_testdata import QUERY_TEST_CASES
+from query_object_testdata import INSERT_QUERY_TEST_CASES, 
SELECT_QUERY_TEST_CASES
 
 
 def _idfn(query_test):
@@ -28,7 +31,7 @@ def _idfn(query_test):
 
 def verify_select_clause_items(query, expected_item_counts):
   """
-  Verify that a well-formed Query() object's select_clause (SelectClause 
instance)
+  Verify that a well-formed SelectQuery() object's select_clause (SelectClause 
instance)
   reports correct item counts. expected_item_counts should be a dictionary 
with keys
   matching SelectItem property methods that report item counts and values for 
the
   counts.
@@ -63,21 +66,28 @@ def verify_sql_matches(actual, expected, strip=True):
 
 
 @pytest.yield_fixture
-def sql_writer():
+def sql_writer(request):
   """
   Return a SqlWriter object that is torn down at the end of each test.
   """
-  # TODO: Later, we can parametrize on dialect, but for now, this is just PoC.
-  yield SqlWriter.create(dialect='IMPALA')
+  yield SqlWriter.create(dialect=request.param)
 
 
[email protected]('query_test', QUERY_TEST_CASES, ids=_idfn)
[email protected]('query_test', SELECT_QUERY_TEST_CASES, ids=_idfn)
 def test_select_clause_items(query_test):
   verify_select_clause_items(query_test.query, query_test.select_item_counts)
 
 
[email protected]('query_test', QUERY_TEST_CASES, ids=_idfn)
[email protected]('query_test', SELECT_QUERY_TEST_CASES + 
INSERT_QUERY_TEST_CASES,
+                         ids=_idfn)
[email protected]('sql_writer', ['IMPALA', 'POSTGRESQL'], indirect=True)
 def test_write_query(sql_writer, query_test):
+  if isinstance(sql_writer, ImpalaSqlWriter):
+    expected_string = getattr(query_test, 'impala_query_string')
+  elif isinstance(sql_writer, PostgresqlSqlWriter):
+    expected_string = getattr(query_test, 'postgres_query_string')
+  else:
+    raise Exception('unsupported writer: {0}'.format(sql_writer))
   verify_sql_matches(
       sql_writer.write_query(query_test.query),
-      query_test.impala_query_string)
+      expected_string)


Reply via email to