hive git commit: HIVE-14949 Enforce that target:source is not 1:N (Eugene Koifman, reviewed by Alan Gates)

ekoifman Mon, 30 Jan 2017 12:30:08 -0800

Repository: hive
Updated Branches:
  refs/heads/master 8db19d150 -> e3b10c1fc



HIVE-14949 Enforce that target:source is not 1:N (Eugene Koifman, reviewed by 
Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e3b10c1f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e3b10c1f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e3b10c1f

Branch: refs/heads/master
Commit: e3b10c1fcb097242b6391aa6deb097de1581fd56
Parents: 8db19d1
Author: Eugene Koifman <ekoif...@hortonworks.com>
Authored: Mon Jan 30 12:29:12 2017 -0800
Committer: Eugene Koifman <ekoif...@hortonworks.com>
Committed: Mon Jan 30 12:29:40 2017 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  4 +
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |  1 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  2 +-
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java  | 77 +++++++++++++++--
 .../generic/GenericUDFCardinalityViolation.java | 64 ++++++++++++++
 .../apache/hadoop/hive/ql/TestTxnCommands.java  | 37 +++++++-
 .../apache/hadoop/hive/ql/TestTxnCommands2.java |  2 +
 .../results/clientpositive/acid_subquery.q.out  |  3 +
 .../results/clientpositive/llap/sqlmerge.q.out  | 91 ++++++++++++++++----
 .../results/clientpositive/show_functions.q.out |  2 +
 10 files changed, 255 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4e83867..586e693 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1946,6 +1946,10 @@ public class HiveConf extends Configuration {
       new TimeValidator(TimeUnit.MILLISECONDS), "Time interval describing how 
often the reaper runs"),
     WRITE_SET_REAPER_INTERVAL("hive.writeset.reaper.interval", "60s",
       new TimeValidator(TimeUnit.MILLISECONDS), "Frequency of WriteSet reaper 
runs"),
+    
+    MERGE_CARDINALITY_VIOLATION_CHECK("hive.merge.cardinality.check", true,
+      "Set to true to ensure that each SQL Merge statement ensures that for 
each row in the target\n" +
+        "table there is at most 1 matching row in the source table per SQL 
Specification."),
 
     // For Druid storage handler
     HIVE_DRUID_INDEXING_GRANULARITY("hive.druid.indexer.segments.granularity", 
"DAY",

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index e166eee..0f05160 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -465,6 +465,7 @@ public final class FunctionRegistry {
     system.registerGenericUDF("printf", GenericUDFPrintf.class);
     system.registerGenericUDF("greatest", GenericUDFGreatest.class);
     system.registerGenericUDF("least", GenericUDFLeast.class);
+    system.registerGenericUDF("cardinality_violation", 
GenericUDFCardinalityViolation.class);
 
     system.registerGenericUDF("from_utc_timestamp", 
GenericUDFFromUtcTimestamp.class);
     system.registerGenericUDF("to_utc_timestamp", 
GenericUDFToUtcTimestamp.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 6249475..6c0f300 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -859,7 +859,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   }
 
   /**
-   * Generate a temp table out of a value clause
+   * Generate a temp table out of a values clause
    * See also {@link #preProcessForInsert(ASTNode, QB)}
    */
   private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws 
SemanticException {

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 79355ba..f102786 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -29,9 +29,12 @@ import java.util.Map;
 import java.util.Set;
 
 import org.antlr.runtime.TokenRewriteStream;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.QueryState;
@@ -127,16 +130,19 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
   /**
    * Append list of partition columns to Insert statement, i.e. the 2nd set of 
partCol1,partCol2
    * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... 
partCol1,partCol2...
-   * @param targetName simple target table name (i.e. name or alias)
+   * @param target target table
    */
-  private void addPartitionColsToSelect(List<FieldSchema> partCols, 
StringBuilder rewrittenQueryStr, String targetName) {
+  private void addPartitionColsToSelect(List<FieldSchema> partCols, 
StringBuilder rewrittenQueryStr,
+                                        ASTNode target) throws 
SemanticException {
+    String targetName = target != null ? getSimpleTableName(target) : null;
+
     // If the table is partitioned, we need to select the partition columns as 
well.
     if (partCols != null) {
       for (FieldSchema fschema : partCols) {
         rewrittenQueryStr.append(", ");
         //would be nice if there was a way to determine if quotes are needed
         if(targetName != null) {
-          rewrittenQueryStr.append(HiveUtils.unparseIdentifier(targetName, 
this.conf)).append('.');
+          rewrittenQueryStr.append(targetName).append('.');
         }
         
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), 
this.conf));
       }
@@ -690,13 +696,15 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     if(numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && 
extraPredicate == null) {
       throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, 
ctx.getCmd());
     }
-
+    handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText, 
targetTable);
     ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
     Context rewrittenCtx = rr.rewrittenCtx;
     ASTNode rewrittenTree = rr.rewrittenTree;
 
     //set dest name mapping on new context
-    for(int insClauseIdx = 1, whenClauseIdx = 0; insClauseIdx < 
rewrittenTree.getChildCount(); insClauseIdx++, whenClauseIdx++) {
+    for(int insClauseIdx = 1, whenClauseIdx = 0;
+        insClauseIdx < rewrittenTree.getChildCount() - 1/*skip cardinality 
violation clause*/;
+        insClauseIdx++, whenClauseIdx++) {
       //we've added Insert clauses in order or WHEN items in whenClauses
       ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx);
       switch 
(getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
@@ -810,6 +818,61 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
      */
     return targetTable.equals(entity.getTable());
   }
+
+  /**
+   * Per SQL Spec ISO/IEC 9075-2:2011(E) Section 14.2 under "General Rules" 
Item 6/Subitem a/Subitem 2/Subitem B,
+   * an error should be raised if > 1 row of "source" matches the same row in 
"target".
+   * This should not affect the runtime of the query as it's running in 
parallel with other
+   * branches of the multi-insert.  It won't actually write any data to 
merge_tmp_table since the
+   * cardinality_violation() UDF throws an error whenever it's called killing 
the query
+   */
+  private void handleCardinalityViolation(StringBuilder rewrittenQueryStr, 
ASTNode target,
+                                          String onClauseAsString, Table 
targetTable)
+              throws SemanticException {
+    if(!conf.getBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK)) {
+      LOG.info("Merge statement cardinality violation check is disabled: " +
+        HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK.varname);
+      return;
+    }
+    //this is a tmp table and thus Session scoped and acid requires SQL 
statement to be serial in a
+    // given session, i.e. the name can be fixed across all invocations
+    String tableName = "merge_tmp_table";
+    rewrittenQueryStr.append("\nINSERT INTO ").append(tableName)
+      .append("\n  SELECT cardinality_violation(")
+      .append(getSimpleTableName(target)).append(".ROW__ID");
+      addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, 
target);
+    
+      rewrittenQueryStr.append(")\n WHERE ").append(onClauseAsString)
+      .append(" GROUP BY 
").append(getSimpleTableName(target)).append(".ROW__ID");
+    
+      addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, 
target);
+
+      rewrittenQueryStr.append(" HAVING count(*) > 1");
+    //say table T has partiton p, we are generating
+    //select cardinality_violation(ROW_ID, p) WHERE ... GROUP BY ROW__ID, p
+    //the Group By args are passed to cardinality_violation to add the 
violating value to the error msg
+    try {
+      if (null == db.getTable(tableName, false)) {
+        StorageFormat format = new StorageFormat(conf);
+        format.processStorageFormat("TextFile");
+        Table table = db.newTable(tableName);
+        table.setSerializationLib(format.getSerde());
+        List<FieldSchema> fields = new ArrayList<FieldSchema>();
+        fields.add(new FieldSchema("val", "int", null));
+        table.setFields(fields);
+        table.setDataLocation(Warehouse.getDnsPath(new 
Path(SessionState.get().getTempTableSpace(),
+          tableName), conf));
+        table.getTTable().setTemporary(true);
+        table.setStoredAsSubDirectories(false);
+        table.setInputFormatClass(format.getInputFormat());
+        table.setOutputFormatClass(format.getOutputFormat());
+        db.createTable(table, true);
+      }
+    }
+    catch(HiveException|MetaException e) {
+      throw new SemanticException(e.getMessage(), e);
+    }
+  }
   /**
    * @param onClauseAsString - because there is no clone() and we need to use 
in multiple places
    * @param deleteExtraPredicate - see notes at caller
@@ -849,7 +912,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
         
rewrittenQueryStr.append(getSimpleTableName(target)).append(".").append(HiveUtils.unparseIdentifier(name,
 this.conf));
       }
     }
-    addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, 
targetName);
+    addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, 
target);
     rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
     String extraPredicate = getWhenClausePredicate(whenMatchedUpdateClause);
     if(extraPredicate != null) {
@@ -883,7 +946,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
     addPartitionColsToInsert(partCols, rewrittenQueryStr);
 
     rewrittenQueryStr.append("    -- delete clause\n select 
").append(targetName).append(".ROW__ID ");
-    addPartitionColsToSelect(partCols, rewrittenQueryStr, targetName);
+    addPartitionColsToSelect(partCols, rewrittenQueryStr, target);
     rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
     String extraPredicate = getWhenClausePredicate(whenMatchedDeleteClause);
     if(extraPredicate != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
new file mode 100644
index 0000000..0724ff4e
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
+import org.apache.logging.log4j.core.layout.StringBuilderEncoder;
+
+/**
+ * GenericUDFArray.
+ *
+ */
+@Description(name = "cardinality_violation",
+  value = "_FUNC_(n0, n1...) - raises Cardinality Violation")
+public class GenericUDFCardinalityViolation extends GenericUDF {
+  private transient Converter[] converters;
+  private transient ArrayList<Object> ret = new ArrayList<Object>();
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws 
UDFArgumentException {
+    return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    StringBuilder nonUniqueKey = new StringBuilder();
+    for(DeferredObject t : arguments) {
+      if(nonUniqueKey.length() > 0) {nonUniqueKey.append(','); }
+      nonUniqueKey.append(t.get());
+    }
+    throw new RuntimeException("Cardinality Violation in Merge statement: " + 
nonUniqueKey);
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    return getStandardDisplayString("cardinality_violation", children, ",");
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java 
b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index 9e2179c..a90dd35 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -80,6 +80,7 @@ public class TestTxnCommands {
   private Driver d;
   private static enum Table {
     ACIDTBL("acidTbl"),
+    ACIDTBLPART("acidTblPart"),
     ACIDTBL2("acidTbl2"),
     NONACIDORCTBL("nonAcidOrcTbl"),
     NONACIDORCTBL2("nonAcidOrcTbl2");
@@ -106,6 +107,7 @@ public class TestTxnCommands {
     hiveConf
     .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
         
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
+    hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, 
true);
     TxnDbUtil.setConfValues(hiveConf);
     TxnDbUtil.prepDb();
     File f = new File(TEST_WAREHOUSE_DIR);
@@ -120,6 +122,7 @@ public class TestTxnCommands {
     d.setMaxRows(10000);
     dropTables();
     runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) 
clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES 
('transactional'='true')");
+    runStatementOnDriver("create table " + Table.ACIDTBLPART + "(a int, b int) 
partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets 
stored as orc TBLPROPERTIES ('transactional'='true')");
     runStatementOnDriver("create table " + Table.NONACIDORCTBL + "(a int, b 
int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc 
TBLPROPERTIES ('transactional'='false')");
     runStatementOnDriver("create table " + Table.NONACIDORCTBL2 + "(a int, b 
int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc 
TBLPROPERTIES ('transactional'='false')");
     runStatementOnDriver("create temporary  table " + Table.ACIDTBL2 + "(a 
int, b int, c int) clustered by (c) into " + BUCKET_COUNT + " buckets stored as 
orc TBLPROPERTIES ('transactional'='true')");
@@ -759,10 +762,11 @@ public class TestTxnCommands {
     LOG.info("Explain1: " + sb);
     for(int i = 0; i < explain.size(); i++) {
       if(explain.get(i).contains("Edges:")) {
-        Assert.assertTrue(explain.get(i + 1).contains("Reducer 2 <- Map 1 
(SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)"));
-        Assert.assertTrue(explain.get(i + 2).contains("Reducer 3 <- Reducer 2 
(SIMPLE_EDGE)"));
-        Assert.assertTrue(explain.get(i + 3).contains("Reducer 4 <- Reducer 2 
(SIMPLE_EDGE)"));
-        Assert.assertTrue(explain.get(i + 4).contains("Reducer 5 <- Reducer 2 
(CUSTOM_SIMPLE_EDGE)"));
+        Assert.assertTrue("At i+1=" + (i+1) + explain.get(i + 1), 
explain.get(i + 1).contains("Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 
(SIMPLE_EDGE)"));
+        Assert.assertTrue("At i+1=" + (i+2) + explain.get(i + 2), 
explain.get(i + 2).contains("Reducer 3 <- Reducer 2 (SIMPLE_EDGE)"));
+        Assert.assertTrue("At i+1=" + (i+3) + explain.get(i + 3), 
explain.get(i + 3).contains("Reducer 4 <- Reducer 2 (SIMPLE_EDGE)"));
+        Assert.assertTrue("At i+1=" + (i+4) + explain.get(i + 4), 
explain.get(i + 4).contains("Reducer 5 <- Reducer 2 (SIMPLE_EDGE)"));
+        Assert.assertTrue("At i+1=" + (i+5) + explain.get(i + 5), 
explain.get(i + 5).contains("Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)"));
         break;
       }
     }
@@ -801,6 +805,31 @@ public class TestTxnCommands {
     int[][] rExpected = {{5,6},{7,8},{11,11}};
     Assert.assertEquals(stringifyValues(rExpected), r);
   }
+
+  /**
+   * see https://issues.apache.org/jira/browse/HIVE-14949 for details
+   * @throws Exception
+   */
+  @Test
+  public void testMergeCardinalityViolation() throws Exception {
+    int[][] sourceVals = {{2,2},{2,44},{5,5},{11,11}};
+    runStatementOnDriver("insert into " + Table.NONACIDORCTBL + " " + 
makeValuesClause(sourceVals));
+    int[][] targetVals = {{2,1},{4,3},{5,6},{7,8}};
+    runStatementOnDriver("insert into " + Table.ACIDTBL + " " + 
makeValuesClause(targetVals));
+    String query = "merge into " + Table.ACIDTBL +
+      " as t using " + Table.NONACIDORCTBL + " s ON t.a = s.a " +
+      "WHEN MATCHED and s.a < 5 THEN DELETE " +
+      "WHEN MATCHED AND s.a < 3 THEN update set b = 0 " +
+      "WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) ";
+    runStatementOnDriverNegative(query);
+    runStatementOnDriver("insert into " + Table.ACIDTBLPART + " partition(p) 
values(1,1,'p1'),(2,2,'p1'),(3,3,'p1'),(4,4,'p2')");
+    query = "merge into " + Table.ACIDTBLPART +
+      " as t using " + Table.NONACIDORCTBL + " s ON t.a = s.a " +
+      "WHEN MATCHED and s.a < 5 THEN DELETE " +
+      "WHEN MATCHED AND s.a < 3 THEN update set b = 0 " +
+      "WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b, 'p1') ";
+    runStatementOnDriverNegative(query);
+  }
   @Test
   public void testSetClauseFakeColumn() throws Exception {
     CommandProcessorResponse cpr = runStatementOnDriverNegative("MERGE INTO "+ 
Table.ACIDTBL +

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java 
b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index af1f962..6718ae9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -120,6 +120,8 @@ public class TestTxnCommands2 {
     hiveConf
         .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
             
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
+    hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, 
true);
+
     TxnDbUtil.setConfValues(hiveConf);
     TxnDbUtil.prepDb();
     File f = new File(TEST_WAREHOUSE_DIR);

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/test/results/clientpositive/acid_subquery.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_subquery.q.out 
b/ql/src/test/results/clientpositive/acid_subquery.q.out
index b6402e4..f3c44b1 100644
--- a/ql/src/test/results/clientpositive/acid_subquery.q.out
+++ b/ql/src/test/results/clientpositive/acid_subquery.q.out
@@ -75,6 +75,7 @@ PREHOOK: Input: default@target
 PREHOOK: Input: default@target@p=1/q=2
 PREHOOK: Input: default@target@p=1/q=3
 PREHOOK: Input: default@target@p=2/q=2
+PREHOOK: Output: default@merge_tmp_table
 PREHOOK: Output: default@target
 PREHOOK: Output: default@target@p=1/q=2
 PREHOOK: Output: default@target@p=1/q=2
@@ -89,9 +90,11 @@ POSTHOOK: Input: default@target
 POSTHOOK: Input: default@target@p=1/q=2
 POSTHOOK: Input: default@target@p=1/q=3
 POSTHOOK: Input: default@target@p=2/q=2
+POSTHOOK: Output: default@merge_tmp_table
 POSTHOOK: Output: default@target@p=1/q=2
 POSTHOOK: Output: default@target@p=1/q=2
 POSTHOOK: Output: default@target@p=1/q=3
 POSTHOOK: Output: default@target@p=1/q=3
 POSTHOOK: Output: default@target@p=2/q=2
 POSTHOOK: Output: default@target@p=2/q=2
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION 
[(target)t.FieldSchema(name:ROW__ID, 
type:struct<transactionId:bigint,bucketId:int,rowId:bigint>, comment:), 
(target)t.FieldSchema(name:p, type:int, comment:null), 
(target)t.FieldSchema(name:q, type:int, comment:null), ]

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out 
b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
index 068b75f..486e812 100644
--- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
+++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
@@ -25,24 +25,27 @@ WHEN MATCHED THEN UPDATE SET b = 7
 WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-3 is a root stage
-  Stage-4 depends on stages: Stage-3
-  Stage-0 depends on stages: Stage-4
-  Stage-5 depends on stages: Stage-0
-  Stage-2 depends on stages: Stage-4
-  Stage-6 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-4
-  Stage-7 depends on stages: Stage-1
+  Stage-4 is a root stage
+  Stage-5 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-5
+  Stage-6 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-5
+  Stage-7 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-5
+  Stage-8 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-5
+  Stage-9 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-3
+  Stage: Stage-4
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -58,7 +61,7 @@ STAGE PLANS:
                     value expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
             Execution mode: llap
             LLAP IO: may be used (ACID table)
-        Map 6 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: s
@@ -108,6 +111,25 @@ STAGE PLANS:
                       Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
                       value expressions: _col1 (type: int)
                 Filter Operator
+                  predicate: (_col0 = _col5) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Select Operator
+                    expressions: _col4 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                    outputColumnNames: _col4
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: _col4 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                        Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+                Filter Operator
                   predicate: _col0 is null (type: boolean)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
                   Select Operator
@@ -152,6 +174,30 @@ STAGE PLANS:
         Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 > 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                  Select Operator
+                    expressions: cardinality_violation(_col0) (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.merge_tmp_table
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
               Select Operator
                 expressions: VALUE._col0 (type: int), VALUE._col1 (type: int)
                 outputColumnNames: _col0, _col1
@@ -165,7 +211,7 @@ STAGE PLANS:
                       serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                       name: default.acidtbl
 
-  Stage: Stage-4
+  Stage: Stage-5
     Dependency Collection
 
   Stage: Stage-0
@@ -178,7 +224,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.acidtbl
 
-  Stage: Stage-5
+  Stage: Stage-6
     Stats-Aggr Operator
 
   Stage: Stage-2
@@ -191,7 +237,20 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.acidtbl
 
-  Stage: Stage-6
+  Stage: Stage-7
+    Stats-Aggr Operator
+
+  Stage: Stage-3
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.merge_tmp_table
+
+  Stage: Stage-8
     Stats-Aggr Operator
 
   Stage: Stage-1
@@ -204,6 +263,6 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.acidtbl
 
-  Stage: Stage-7
+  Stage: Stage-9
     Stats-Aggr Operator
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e3b10c1f/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out 
b/ql/src/test/results/clientpositive/show_functions.q.out
index b8daea9..3c9bb4a 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -38,6 +38,7 @@ between
 bin
 bloom_filter
 bround
+cardinality_violation
 case
 cbrt
 ceil
@@ -263,6 +264,7 @@ PREHOOK: query: SHOW FUNCTIONS '^c.*'
 PREHOOK: type: SHOWFUNCTIONS
 POSTHOOK: query: SHOW FUNCTIONS '^c.*'
 POSTHOOK: type: SHOWFUNCTIONS
+cardinality_violation
 case
 cbrt
 ceil

hive git commit: HIVE-14949 Enforce that target:source is not 1:N (Eugene Koifman, reviewed by Alan Gates)

Reply via email to