hive git commit: HIVE-13486: Cast the column type for column masking (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-04-13 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 7049f49d9 -> 529580f88


HIVE-13486: Cast the column type for column masking (Pengcheng Xiong, reviewed 
by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/529580f8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/529580f8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/529580f8

Branch: refs/heads/master
Commit: 529580f88e6e9c694a705028e08fd3ee59fd260a
Parents: 7049f49
Author: Pengcheng Xiong 
Authored: Wed Apr 13 13:12:50 2016 -0700
Committer: Pengcheng Xiong 
Committed: Wed Apr 13 13:12:50 2016 -0700

--
 .../apache/hadoop/hive/ql/parse/MaskAndFilterInfo.java|  7 ++-
 .../org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 10 ++
 .../java/org/apache/hadoop/hive/ql/parse/TableMask.java   |  9 -
 3 files changed, 20 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/529580f8/ql/src/java/org/apache/hadoop/hive/ql/parse/MaskAndFilterInfo.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MaskAndFilterInfo.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/MaskAndFilterInfo.java
index 1678d2c..f5a12a3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MaskAndFilterInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MaskAndFilterInfo.java
@@ -18,13 +18,18 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import java.util.List;
+
 public class MaskAndFilterInfo {
+  List colTypes;
   String additionalTabInfo;
   String alias;
   ASTNode astNode;
 
-  public MaskAndFilterInfo(String additionalTabInfo, String alias, ASTNode 
astNode) {
+  public MaskAndFilterInfo(List colTypes, String additionalTabInfo, 
String alias,
+  ASTNode astNode) {
 super();
+this.colTypes = colTypes;
 this.additionalTabInfo = additionalTabInfo;
 this.alias = alias;
 this.astNode = astNode;

http://git-wip-us.apache.org/repos/asf/hive/blob/529580f8/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 9b565c5..d3e7040 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -10408,13 +10408,15 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   throw new SemanticException("Table " + tabIdName + " is not found.");
 }
 
-List columns = new ArrayList<>();
+List colNames = new ArrayList<>();
+List colTypes = new ArrayList<>();
 for (FieldSchema col : table.getAllCols()) {
-  columns.add(col.getName());
+  colNames.add(col.getName());
+  colTypes.add(col.getType());
 }
 
-basicInfos.put(new HivePrivilegeObject(table.getDbName(), 
table.getTableName(), columns),
-new MaskAndFilterInfo(additionalTabInfo.toString(), alias, 
astNode));
+basicInfos.put(new HivePrivilegeObject(table.getDbName(), 
table.getTableName(), colNames),
+new MaskAndFilterInfo(colTypes, additionalTabInfo.toString(), 
alias, astNode));
   }
   if (astNode.getChildCount() > 0 && 
!ignoredTokens.contains(astNode.getToken().getType())) {
 for (Node child : astNode.getChildren()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/529580f8/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java
index f030da2..f3c7262 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java
@@ -89,6 +89,7 @@ public class TableMask {
 throw new SemanticException("Expect " + privObject.getColumns().size() 
+ " columns in "
 + privObject.getObjectName() + ", but only find " + exprs.size());
   }
+  List colTypes = maskAndFilterInfo.colTypes;
   for (int index = 0; index < exprs.size(); index++) {
 String expr = exprs.get(index);
 if (expr == null) {
@@ -100,7 +101,13 @@ public class TableMask {
 } else {
   firstOne = false;
 }
-sb.append(expr + " AS " + privObject.getColumns().get(index));
+String colName = privObject.getColumns().get(index);
+if (!expr.equals(colName)) {
+  // CAST(expr AS COLTYPE) AS COLNAME
+  sb.append("CAST(" + expr + " AS " + colType

hive git commit: HIVE-13477: Set HivePrivilegeObjectType to TABLE_OR_VIEW (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-04-14 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master a207923ff -> 98699b3b7


HIVE-13477: Set HivePrivilegeObjectType to TABLE_OR_VIEW (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/98699b3b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/98699b3b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/98699b3b

Branch: refs/heads/master
Commit: 98699b3b7e961630e4da1404fa1c94f61dfd1a61
Parents: a207923
Author: Pengcheng Xiong 
Authored: Thu Apr 14 13:17:22 2016 -0700
Committer: Pengcheng Xiong 
Committed: Thu Apr 14 13:17:22 2016 -0700

--
 .../hive/ql/security/authorization/plugin/HivePrivilegeObject.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/98699b3b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java
index 180006f..41983f1 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java
@@ -151,7 +151,7 @@ public class HivePrivilegeObject implements 
Comparable {
   }
 
   public HivePrivilegeObject(String dbname, String objectName, List 
columns) {
-this(null, dbname, objectName, null, columns, null);
+this(HivePrivilegeObjectType.TABLE_OR_VIEW, dbname, objectName, null, 
columns, null);
   }
 
   public HivePrivilegeObject(HivePrivilegeObjectType type, String dbname, 
String objectName,



hive git commit: HIVE-13553: CTE with upperCase alias throws exception (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-04-23 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 749e27a92 -> 287f0451b


HIVE-13553: CTE with upperCase alias throws exception (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/287f0451
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/287f0451
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/287f0451

Branch: refs/heads/master
Commit: 287f0451bce6173868d5ddf224e30dff8dde2ca5
Parents: 749e27a
Author: Pengcheng Xiong 
Authored: Sat Apr 23 16:59:37 2016 -0700
Committer: Pengcheng Xiong 
Committed: Sat Apr 23 16:59:37 2016 -0700

--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  4 +-
 ql/src/test/queries/clientpositive/cte_6.q  | 10 +++
 ql/src/test/results/clientpositive/cte_6.q.out  | 86 
 3 files changed, 98 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/287f0451/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 005b53f..9af7749 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -674,7 +674,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
 ASTNode tableTree = (ASTNode) (tabref.getChild(0));
 
-String tabIdName = getUnescapedName(tableTree);
+String tabIdName = getUnescapedName(tableTree).toLowerCase();
 
 String alias;
 if (aliasIndex != 0) {
@@ -1564,7 +1564,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   case HiveParser.TOK_ANALYZE:
 // Case of analyze command
 
-String table_name = getUnescapedName((ASTNode) 
ast.getChild(0).getChild(0));
+String table_name = getUnescapedName((ASTNode) 
ast.getChild(0).getChild(0)).toLowerCase();
 
 
 qb.setTabAlias(table_name, table_name);

http://git-wip-us.apache.org/repos/asf/hive/blob/287f0451/ql/src/test/queries/clientpositive/cte_6.q
--
diff --git a/ql/src/test/queries/clientpositive/cte_6.q 
b/ql/src/test/queries/clientpositive/cte_6.q
new file mode 100644
index 000..964d1b8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cte_6.q
@@ -0,0 +1,10 @@
+explain
+with Q1 as ( select key from sRc where key = '5')
+select CPS.key from Q1 CPS;
+
+-- chaining
+
+explain
+with Q1 as ( select key from q2 where key = '5'),
+Q2 as ( select key from sRc where key = '5')
+select CPS.key from Q1 CPS;

http://git-wip-us.apache.org/repos/asf/hive/blob/287f0451/ql/src/test/results/clientpositive/cte_6.q.out
--
diff --git a/ql/src/test/results/clientpositive/cte_6.q.out 
b/ql/src/test/results/clientpositive/cte_6.q.out
new file mode 100644
index 000..8cc433b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cte_6.q.out
@@ -0,0 +1,86 @@
+PREHOOK: query: explain
+with Q1 as ( select key from sRc where key = '5')
+select CPS.key from Q1 CPS
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with Q1 as ( select key from sRc where key = '5')
+select CPS.key from Q1 CPS
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: src
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+Filter Operator
+  predicate: (key = '5') (type: boolean)
+  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: '5' (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
+
+PREHOOK: query: -- chaining
+
+explain
+with Q1 as ( select key from q2 where key = '5'),
+Q2 as ( select key from sRc where key =

hive git commit: HIVE-13541: Pass view's ColumnAccessInfo to HiveAuthorizer (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-04-26 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 0ac424f0a -> 154850124


HIVE-13541: Pass view's ColumnAccessInfo to HiveAuthorizer (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/15485012
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/15485012
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/15485012

Branch: refs/heads/master
Commit: 15485012424b84e4205ac30c09c33548c81f8d79
Parents: 0ac424f
Author: Pengcheng Xiong 
Authored: Tue Apr 26 10:52:36 2016 -0700
Committer: Pengcheng Xiong 
Committed: Tue Apr 26 10:52:36 2016 -0700

--
 .../TestHiveAuthorizerCheckInvocation.java  | 52 
 .../java/org/apache/hadoop/hive/ql/Driver.java  |  2 +-
 .../ql/optimizer/ColumnPrunerProcFactory.java   |  2 +-
 .../calcite/rules/HiveRelFieldTrimmer.java  |  2 +-
 .../hive/ql/parse/ColumnAccessAnalyzer.java | 31 +++-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 12 +++--
 .../hadoop/hive/ql/parse/TestColumnAccess.java  | 10 ++--
 7 files changed, 86 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/15485012/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
index acf2663..5e601c9 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
@@ -60,6 +60,8 @@ public class TestHiveAuthorizerCheckInvocation {
   protected static Driver driver;
   private static final String tableName = 
TestHiveAuthorizerCheckInvocation.class.getSimpleName()
   + "Table";
+  private static final String viewName = 
TestHiveAuthorizerCheckInvocation.class.getSimpleName()
+  + "View";
   private static final String inDbTableName = tableName + "_in_db";
   private static final String acidTableName = tableName + "_acid";
   private static final String dbName = 
TestHiveAuthorizerCheckInvocation.class.getSimpleName()
@@ -97,6 +99,7 @@ public class TestHiveAuthorizerCheckInvocation {
 driver = new Driver(conf);
 runCmd("create table " + tableName
 + " (i int, j int, k string) partitioned by (city string, `date` 
string) ");
+runCmd("create view " + viewName + " as select * from " + tableName);
 runCmd("create database " + dbName);
 runCmd("create table " + dbName + "." + inDbTableName + "(i int)");
 // Need a separate table for ACID testing since it has to be bucketed and 
it has to be Acid
@@ -114,6 +117,7 @@ public class TestHiveAuthorizerCheckInvocation {
 // Drop the tables when we're done.  This makes the test work inside an IDE
 runCmd("drop table if exists " + acidTableName);
 runCmd("drop table if exists " + tableName);
+runCmd("drop table if exists " + viewName);
 runCmd("drop table if exists " + dbName + "." + inDbTableName);
 runCmd("drop database if exists " + dbName );
 driver.close();
@@ -136,6 +140,46 @@ public class TestHiveAuthorizerCheckInvocation {
 getSortedList(tableObj.getColumns()));
   }
 
+  @Test
+  public void testInputSomeColumnsUsedView() throws HiveAuthzPluginException, 
HiveAccessControlException,
+  CommandNeedRetryException {
+
+reset(mockedAuthorizer);
+int status = driver.compile("select i from " + viewName
++ " where k = 'X' and city = 'Scottsdale-AZ' ");
+assertEquals(0, status);
+
+List inputs = 
getHivePrivilegeObjectInputs().getLeft();
+checkSingleViewInput(inputs);
+HivePrivilegeObject tableObj = inputs.get(0);
+assertEquals("no of columns used", 3, tableObj.getColumns().size());
+assertEquals("Columns used", Arrays.asList("city", "i", "k"),
+getSortedList(tableObj.getColumns()));
+  }
+
+  @Test
+  public void testInputSomeColumnsUsedJoin() throws HiveAuthzPluginException, 
HiveAccessControlException,
+  CommandNeedRetryException {
+
+reset(mockedAuthorizer);
+int status = driver.compile("select " + viewName + ".i, " + tableName + 
".city from "
++ viewName + " join " + tableName + " on " + viewName + ".city = " + 
tableName
++ ".city where " + tableName + ".k = 'X'");
+assertEquals(0, status);
+
+List inputs = 
getHivePrivilegeObjectInputs().getLeft();
+Collections.sort(inputs);
+

hive git commit: HIVE-12663: Support quoted table names/columns when ACID is on (Pengcheng Xiong, reviewed by Eugene Koifman)

2015-12-15 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master c5b2c0ebb -> be98f0133


HIVE-12663: Support quoted table names/columns when ACID is on (Pengcheng 
Xiong, reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/be98f013
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/be98f013
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/be98f013

Branch: refs/heads/master
Commit: be98f01332fd3ce232631d75bbc35a8252828454
Parents: c5b2c0e
Author: Pengcheng Xiong 
Authored: Tue Dec 15 10:18:27 2015 -0800
Committer: Pengcheng Xiong 
Committed: Tue Dec 15 10:18:27 2015 -0800

--
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java  | 21 --
 ..._after_multiple_inserts_special_characters.q | 25 +++
 ...er_multiple_inserts_special_characters.q.out | 78 
 3 files changed, 117 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/be98f013/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 4c69534..5b4365c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -143,16 +144,20 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
 List bucketingCols = mTable.getBucketCols();
 
 rewrittenQueryStr.append("insert into table ");
-rewrittenQueryStr.append(getDotName(tableName));
+rewrittenQueryStr.append(getDotName(new String[] {
+HiveUtils.unparseIdentifier(tableName[0], this.conf),
+HiveUtils.unparseIdentifier(tableName[1], this.conf) }));
 
 // If the table is partitioned we have to put the partition() clause in
 if (partCols != null && partCols.size() > 0) {
   rewrittenQueryStr.append(" partition (");
   boolean first = true;
   for (FieldSchema fschema : partCols) {
-if (first) first = false;
-else rewrittenQueryStr.append(", ");
-rewrittenQueryStr.append(fschema.getName());
+if (first)
+  first = false;
+else
+  rewrittenQueryStr.append(", ");
+
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), 
this.conf));
   }
   rewrittenQueryStr.append(")");
 }
@@ -214,7 +219,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
 rewrittenQueryStr.append(',');
 String name = nonPartCols.get(i).getName();
 ASTNode setCol = setCols.get(name);
-rewrittenQueryStr.append(name);
+rewrittenQueryStr.append(HiveUtils.unparseIdentifier(name, this.conf));
 if (setCol != null) {
   // This is one of the columns we're setting, record it's position so 
we can come back
   // later and patch it up.
@@ -228,11 +233,13 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
 if (partCols != null) {
   for (FieldSchema fschema : partCols) {
 rewrittenQueryStr.append(", ");
-rewrittenQueryStr.append(fschema.getName());
+
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), 
this.conf));
   }
 }
 rewrittenQueryStr.append(" from ");
-rewrittenQueryStr.append(getDotName(tableName));
+rewrittenQueryStr.append(getDotName(new String[] {
+HiveUtils.unparseIdentifier(tableName[0], this.conf),
+HiveUtils.unparseIdentifier(tableName[1], this.conf) }));
 
 ASTNode where = null;
 int whereIndex = deleting() ? 1 : 2;

http://git-wip-us.apache.org/repos/asf/hive/blob/be98f013/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
--
diff --git 
a/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
 
b/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
new file mode 100644
index 000..65d3fb3
--- /dev/null
+++ 
b/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
@@ -0,0 +1,25 @@
+set 

hive git commit: HIVE-12663: Support quoted table names/columns when ACID is on (Pengcheng Xiong, reviewed by Eugene Koifman)

2015-12-16 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.0 d8ee05aeb -> fffebe67f


HIVE-12663: Support quoted table names/columns when ACID is on (Pengcheng 
Xiong, reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fffebe67
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fffebe67
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fffebe67

Branch: refs/heads/branch-2.0
Commit: fffebe67f4de9597b8c212ac93ba693918231a6b
Parents: d8ee05a
Author: Pengcheng Xiong 
Authored: Tue Dec 15 10:18:27 2015 -0800
Committer: Pengcheng Xiong 
Committed: Wed Dec 16 11:17:03 2015 -0800

--
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java  | 21 --
 ..._after_multiple_inserts_special_characters.q | 25 +++
 ...er_multiple_inserts_special_characters.q.out | 78 
 3 files changed, 117 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/fffebe67/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 4c69534..5b4365c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -143,16 +144,20 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
 List bucketingCols = mTable.getBucketCols();
 
 rewrittenQueryStr.append("insert into table ");
-rewrittenQueryStr.append(getDotName(tableName));
+rewrittenQueryStr.append(getDotName(new String[] {
+HiveUtils.unparseIdentifier(tableName[0], this.conf),
+HiveUtils.unparseIdentifier(tableName[1], this.conf) }));
 
 // If the table is partitioned we have to put the partition() clause in
 if (partCols != null && partCols.size() > 0) {
   rewrittenQueryStr.append(" partition (");
   boolean first = true;
   for (FieldSchema fschema : partCols) {
-if (first) first = false;
-else rewrittenQueryStr.append(", ");
-rewrittenQueryStr.append(fschema.getName());
+if (first)
+  first = false;
+else
+  rewrittenQueryStr.append(", ");
+
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), 
this.conf));
   }
   rewrittenQueryStr.append(")");
 }
@@ -214,7 +219,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
 rewrittenQueryStr.append(',');
 String name = nonPartCols.get(i).getName();
 ASTNode setCol = setCols.get(name);
-rewrittenQueryStr.append(name);
+rewrittenQueryStr.append(HiveUtils.unparseIdentifier(name, this.conf));
 if (setCol != null) {
   // This is one of the columns we're setting, record it's position so 
we can come back
   // later and patch it up.
@@ -228,11 +233,13 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
 if (partCols != null) {
   for (FieldSchema fschema : partCols) {
 rewrittenQueryStr.append(", ");
-rewrittenQueryStr.append(fschema.getName());
+
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), 
this.conf));
   }
 }
 rewrittenQueryStr.append(" from ");
-rewrittenQueryStr.append(getDotName(tableName));
+rewrittenQueryStr.append(getDotName(new String[] {
+HiveUtils.unparseIdentifier(tableName[0], this.conf),
+HiveUtils.unparseIdentifier(tableName[1], this.conf) }));
 
 ASTNode where = null;
 int whereIndex = deleting() ? 1 : 2;

http://git-wip-us.apache.org/repos/asf/hive/blob/fffebe67/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
--
diff --git 
a/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
 
b/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
new file mode 100644
index 000..65d3fb3
--- /dev/null
+++ 
b/ql/src/test/queries/clientpositive/update_after_multiple_inserts_special_characters.q
@@ -0,0 +1,25 

hive git commit: HIVE-11775: Implement limit push down through union all in CBO (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)

2015-12-18 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master f1ecce036 -> 71536a2f8


HIVE-11775: Implement limit push down through union all in CBO (Pengcheng 
Xiong, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/71536a2f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/71536a2f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/71536a2f

Branch: refs/heads/master
Commit: 71536a2f8295f61602e776e4e5773c7007a46b69
Parents: f1ecce0
Author: Pengcheng Xiong 
Authored: Fri Dec 18 23:44:40 2015 -0800
Committer: Pengcheng Xiong 
Committed: Fri Dec 18 23:44:40 2015 -0800

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   20 +-
 .../calcite/rules/HiveSortUnionReduceRule.java  |  109 ++
 .../hadoop/hive/ql/parse/CalcitePlanner.java|   10 +-
 .../clientpositive/cbo_SortUnionTransposeRule.q |  100 ++
 .../clientpositive/limit_join_transpose.q   |   16 +-
 .../clientpositive/tez_dynpart_hashjoin_3.q |6 +-
 .../cbo_SortUnionTransposeRule.q.out| 1196 ++
 7 files changed, 1432 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/71536a2f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 60ac0c0..9e8e2f5 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1312,16 +1312,16 @@ public class HiveConf extends Configuration {
 "we are increasing the number of files possibly by a big margin. So, 
we merge aggressively."),
 HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit 
intra-query correlations."),
 
-HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE("hive.optimize.limitjointranspose", 
false,
-"Whether to push a limit through left/right outer join. If the value 
is true and the size of the outer\n" +
-"input is reduced enough (as specified in 
hive.optimize.limitjointranspose.reduction), the limit is pushed\n" +
-"to the outer input; to remain semantically correct, the limit is kept 
on top of the join too."),
-
HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limitjointranspose.reductionpercentage",
 1.0f,
-"When hive.optimize.limitjointranspose is true, this variable 
specifies the minimal reduction of the\n" +
-"size of the outer input of the join that we should get in order to 
apply the rule."),
-
HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limitjointranspose.reductiontuples",
 (long) 0,
-"When hive.optimize.limitjointranspose is true, this variable 
specifies the minimal reduction in the\n" +
-"number of tuples of the outer input of the join that you should get 
in order to apply the rule."),
+HIVE_OPTIMIZE_LIMIT_TRANSPOSE("hive.optimize.limittranspose", false,
+"Whether to push a limit through left/right outer join or union. If 
the value is true and the size of the outer\n" +
+"input is reduced enough (as specified in 
hive.optimize.limittranspose.reduction), the limit is pushed\n" +
+"to the outer input or union; to remain semantically correct, the 
limit is kept on top of the join or the union too."),
+
HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limittranspose.reductionpercentage",
 1.0f,
+"When hive.optimize.limittranspose is true, this variable specifies 
the minimal reduction of the\n" +
+"size of the outer input of the join or input of the union that we 
should get in order to apply the rule."),
+
HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limittranspose.reductiontuples",
 (long) 0,
+"When hive.optimize.limittranspose is true, this variable specifies 
the minimal reduction in the\n" +
+"number of tuples of the outer input of the join or the input of the 
union that you should get in order to apply the rule."),
 
 HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", 
false,
 "Whether to create a separate plan for skewed keys for the tables in 
the join.\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/71536a2f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
new file mode 100644
index 000..0ec8bf1
--- /dev/null
+

hive git commit: HIVE-11775: Implement limit push down through union all in CBO (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)

2015-12-18 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.0 e4e91172b -> 422d58c25


HIVE-11775: Implement limit push down through union all in CBO (Pengcheng 
Xiong, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/422d58c2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/422d58c2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/422d58c2

Branch: refs/heads/branch-2.0
Commit: 422d58c2538ba318d34d6e6b41460cdd16369d8b
Parents: e4e9117
Author: Pengcheng Xiong 
Authored: Fri Dec 18 23:44:40 2015 -0800
Committer: Pengcheng Xiong 
Committed: Fri Dec 18 23:45:41 2015 -0800

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   20 +-
 .../calcite/rules/HiveSortUnionReduceRule.java  |  109 ++
 .../hadoop/hive/ql/parse/CalcitePlanner.java|   10 +-
 .../clientpositive/cbo_SortUnionTransposeRule.q |  100 ++
 .../clientpositive/limit_join_transpose.q   |   16 +-
 .../clientpositive/tez_dynpart_hashjoin_3.q |6 +-
 .../cbo_SortUnionTransposeRule.q.out| 1196 ++
 7 files changed, 1432 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/422d58c2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 36e281a..5f1772b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1307,16 +1307,16 @@ public class HiveConf extends Configuration {
 "we are increasing the number of files possibly by a big margin. So, 
we merge aggressively."),
 HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit 
intra-query correlations."),
 
-HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE("hive.optimize.limitjointranspose", 
false,
-"Whether to push a limit through left/right outer join. If the value 
is true and the size of the outer\n" +
-"input is reduced enough (as specified in 
hive.optimize.limitjointranspose.reduction), the limit is pushed\n" +
-"to the outer input; to remain semantically correct, the limit is kept 
on top of the join too."),
-
HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limitjointranspose.reductionpercentage",
 1.0f,
-"When hive.optimize.limitjointranspose is true, this variable 
specifies the minimal reduction of the\n" +
-"size of the outer input of the join that we should get in order to 
apply the rule."),
-
HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limitjointranspose.reductiontuples",
 (long) 0,
-"When hive.optimize.limitjointranspose is true, this variable 
specifies the minimal reduction in the\n" +
-"number of tuples of the outer input of the join that you should get 
in order to apply the rule."),
+HIVE_OPTIMIZE_LIMIT_TRANSPOSE("hive.optimize.limittranspose", false,
+"Whether to push a limit through left/right outer join or union. If 
the value is true and the size of the outer\n" +
+"input is reduced enough (as specified in 
hive.optimize.limittranspose.reduction), the limit is pushed\n" +
+"to the outer input or union; to remain semantically correct, the 
limit is kept on top of the join or the union too."),
+
HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limittranspose.reductionpercentage",
 1.0f,
+"When hive.optimize.limittranspose is true, this variable specifies 
the minimal reduction of the\n" +
+"size of the outer input of the join or input of the union that we 
should get in order to apply the rule."),
+
HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limittranspose.reductiontuples",
 (long) 0,
+"When hive.optimize.limittranspose is true, this variable specifies 
the minimal reduction in the\n" +
+"number of tuples of the outer input of the join or the input of the 
union that you should get in order to apply the rule."),
 
 HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", 
false,
 "Whether to create a separate plan for skewed keys for the tables in 
the join.\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/422d58c2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java
new file mode 100644
index 000..0ec8bf1
--- /de

[5/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/perf/query64.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/query64.q.out 
b/ql/src/test/results/clientpositive/perf/query64.q.out
index a63..68730e7 100644
--- a/ql/src/test/results/clientpositive/perf/query64.q.out
+++ b/ql/src/test/results/clientpositive/perf/query64.q.out
@@ -51,61 +51,61 @@ Stage-0
   limit:-1
   Stage-1
  Reducer 20
- File Output Operator [FS_253]
+ File Output Operator [FS_254]
 compressed:false
 Statistics:Num rows: 122532649 Data size: 105380558466 Basic 
stats: COMPLETE Column stats: NONE
 table:{"input 
format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
-Select Operator [SEL_252]
+Select Operator [SEL_253]
 |  
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"]
 |  Statistics:Num rows: 122532649 Data size: 105380558466 Basic 
stats: COMPLETE Column stats: NONE
 |<-Reducer 19 [SIMPLE_EDGE]
-   Reduce Output Operator [RS_251]
+   Reduce Output Operator [RS_252]
   key expressions:_col0 (type: string), _col1 (type: string), 
_col20 (type: bigint)
   sort order:+++
   Statistics:Num rows: 122532649 Data size: 105380558466 Basic 
stats: COMPLETE Column stats: NONE
-  value expressions:_col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: string), _col6 (type: string), 
_col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: 
string), _col11 (type: int), _col12 (type: bigint), _col13 (type: 
decimal(17,2)), _col14 (type: decimal(17,2)), _col15 (type: decimal(17,2)), 
_col16 (type: decimal(17,2)), _col17 (type: decimal(17,2)), _col18 (type: 
decimal(17,2)), _col19 (type: int)
-  Select Operator [SEL_250]
- 
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"]
+  value expressions:_col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: string), _col6 (type: string), 
_col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: 
string), _col12 (type: bigint), _col13 (type: decimal(17,2)), _col14 (type: 
decimal(17,2)), _col15 (type: decimal(17,2)), _col16 (type: decimal(17,2)), 
_col17 (type: decimal(17,2)), _col18 (type: decimal(17,2)), _col19 (type: int)
+  Select Operator [SEL_251]
+ 
outputColumnNames:["_col0","_col1","_col10","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col2","_col20","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
  Statistics:Num rows: 122532649 Data size: 105380558466 
Basic stats: COMPLETE Column stats: NONE
- Filter Operator [FIL_249]
+ Filter Operator [FIL_250]
 predicate:(_col34 <= _col15) (type: boolean)
 Statistics:Num rows: 122532649 Data size: 105380558466 
Basic stats: COMPLETE Column stats: NONE
-Merge Join Operator [MERGEJOIN_714]
+Merge Join Operator [MERGEJOIN_715]
 |  condition map:[{"":"Inner Join 0 to 1"}]
 |  keys:{"0":"_col1 (type: int), _col2 (type: string), 
_col3 (type: string)","1":"_col1 (type: int), _col2 (type: string), _col3 
(type: string)"}
-|  
outputColumnNames:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col31","_col34","_col35","_col36","_col37"]
+|  
outputColumnNames:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col31","_col34","_col35","_col36","_col37"]
 |  Statistics:Num rows: 367597947 Data size: 
316141675400 Basic stats: COMPLETE Column stats: NONE
 |<-Reducer 18 [SIMPLE_EDGE]
-|  Reduce Output Operator [RS_246]
+|  Reduce Output Operator [RS_247]
 | key expressions:_col1 (type: int), _col2 (type: 
string), _col3 (type: string)
 | Map-reduce partition columns:_col1 (type: int), 
_col2 (type: string), _col3 (type: string)
 

[7/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out 
b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
index b322ef1..7c5be6d 100644
--- a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
@@ -756,23 +756,23 @@ STAGE PLANS:
 alias: tbl_pred
 Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
-  predicate: ((d >= 10.0) and (d < 12.0)) and (s like '%son')) 
and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: 
boolean)
-  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+  predicate: ((d >= 10.0) and (d < 12.0)) and (s like '%son')) 
and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: 
boolean)
+  Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: t (type: tinyint), si (type: smallint), d (type: 
double), s (type: string)
 outputColumnNames: _col0, _col1, _col2, _col3
-Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE 
Column stats: NONE
 Reduce Output Operator
   key expressions: _col3 (type: string)
   sort order: -
-  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE 
Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: double)
   Reduce Operator Tree:
 Select Operator
   expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: 
smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
   outputColumnNames: _col0, _col1, _col2, _col3
-  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column 
stats: NONE
+  Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column 
stats: NONE
   Limit
 Number of rows: 3
 Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column 
stats: NONE
@@ -822,26 +822,26 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan
 alias: tbl_pred
-filterExpr: ((d >= 10.0) and (d < 12.0)) and (s like '%son')) 
and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: 
boolean)
+filterExpr: ((d >= 10.0) and (d < 12.0)) and (s like '%son')) 
and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: 
boolean)
 Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE 
Column stats: NONE
 Filter Operator
-  predicate: ((d >= 10.0) and (d < 12.0)) and (s like '%son')) 
and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: 
boolean)
-  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+  predicate: ((d >= 10.0) and (d < 12.0)) and (s like '%son')) 
and (t > 0)) and si BETWEEN 300 AND 400) and (not (s like '%car%'))) (type: 
boolean)
+  Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: t (type: tinyint), si (type: smallint), d (type: 
double), s (type: string)
 outputColumnNames: _col0, _col1, _col2, _col3
-Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE 
Column stats: NONE
 Reduce Output Operator
   key expressions: _col3 (type: string)
   sort order: -
-  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE 
Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: double)
   Reduce Operator Tree:
 Select Operator
   expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: 
smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
   outputColumnNames: _col0, _col1, _col2, _col3
-  Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE 

[3/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/perf/query75.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/query75.q.out 
b/ql/src/test/results/clientpositive/perf/query75.q.out
index d54000b..f3f9827 100644
--- a/ql/src/test/results/clientpositive/perf/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/query75.q.out
@@ -48,9 +48,9 @@ Stage-0
  key expressions:_col8 (type: bigint)
  sort order:+
  Statistics:Num rows: 169103 Data size: 242878993 Basic 
stats: COMPLETE Column stats: NONE
- value expressions:_col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), 
_col6 (type: bigint), _col7 (type: bigint), _col9 (type: double)
+ value expressions:_col0 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: bigint), 
_col7 (type: bigint), _col9 (type: double)
  Select Operator [SEL_146]
-
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
+
outputColumnNames:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
 Statistics:Num rows: 169103 Data size: 242878993 Basic 
stats: COMPLETE Column stats: NONE
 Filter Operator [FIL_145]
predicate:((CAST( _col5 AS decimal(17,2)) / CAST( 
_col12 AS decimal(17,2))) < 0.9) (type: boolean)
@@ -58,7 +58,7 @@ Stage-0
Merge Join Operator [MERGEJOIN_253]
|  condition map:[{"":"Inner Join 0 to 1"}]
|  keys:{"0":"_col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int)","1":"_col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int)"}
-   |  
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col12","_col13"]
+   |  
outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col12","_col13"]
|  Statistics:Num rows: 507310 Data size: 728638416 
Basic stats: COMPLETE Column stats: NONE
|<-Reducer 31 [SIMPLE_EDGE]
|  Reduce Output Operator [RS_143]
@@ -394,326 +394,335 @@ Stage-0
  Map-reduce partition columns:_col1 (type: 
int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
  sort order:
  Statistics:Num rows: 461191 Data size: 
662398546 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col0 (type: int), _col5 
(type: bigint), _col6 (type: double)
- Group By Operator [GBY_69]
- |  
aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"]
- |  keys:KEY._col0 (type: int), KEY._col1 
(type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int)
- |  
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
- |  Statistics:Num rows: 461191 Data size: 
662398546 Basic stats: COMPLETE Column stats: NONE
- |<-Union 5 [SIMPLE_EDGE]
-|<-Reducer 15 [CONTAINS]
-|  Reduce Output Operator [RS_68]
-| key expressions:_col0 (type: int), 
_col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
-| Map-reduce partition columns:_col0 
(type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 
(type: int)
-| sort order:+
-| Statistics:Num rows: 922383 Data 
size: 1324798530 Basic stats: COMPLETE Column stats: NONE
-| value expressions:_col5 (type: 
bigint), _col6 (type: double)
-| Group By Operator [GBY_67]
-|
aggregations:["sum(_col5)","sum(_col6)"]
-|keys:_col0 (type: int), _col1 
(type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int)
-|
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
-|Statistics:Num rows: 922383 Data 
size: 1324798530 Basic stats: COMPLETE Column stats: NONE
-| 

[2/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/quotedid_partition.q.out
--
diff --git a/ql/src/test/results/clientpositive/quotedid_partition.q.out 
b/ql/src/test/results/clientpositive/quotedid_partition.q.out
index d34a005..e40d0d0 100644
--- a/ql/src/test/results/clientpositive/quotedid_partition.q.out
+++ b/ql/src/test/results/clientpositive/quotedid_partition.q.out
@@ -46,11 +46,11 @@ STAGE PLANS:
   predicate: (x+1 = '10') (type: boolean)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: '10' (type: string), y&y (type: string), 'a' 
(type: string)
-outputColumnNames: x+1, y&y, !@#$%^&*()_q
+expressions: y&y (type: string)
+outputColumnNames: _col1
 Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
-  keys: x+1 (type: string), y&y (type: string), !@#$%^&*()_q 
(type: string)
+  keys: '10' (type: string), _col1 (type: string), 'a' (type: 
string)
   mode: hash
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
@@ -65,13 +65,17 @@ STAGE PLANS:
   mode: mergepartial
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
+  Select Operator
+expressions: '10' (type: string), _col1 (type: string), 'a' (type: 
string)
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
-table:
-input format: org.apache.hadoop.mapred.TextInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE 
Column stats: NONE
+  table:
+  input format: org.apache.hadoop.mapred.TextInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
index b5e7846..57a89d6 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
@@ -1567,7 +1567,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcbucket_mapjoin
 POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
 POSTHOOK: Output: default@tab@ds=2008-04-08
-POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, 
comment:null), ]
+POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE []
 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, 
comment:null), ]
 PREHOOK: query: explain
 select count(*)
@@ -1688,7 +1688,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcbucket_mapjoin
 POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
 POSTHOOK: Output: default@tab@ds=2008-04-08
-POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, 
comment:null), ]
+POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE []
 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, 
comment:null), ]
 PREHOOK: query: explain
 select count(*)

http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out 
b/ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out
index 98c5802..c8fc4d3 100644
--- a/ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucketizedhiveinputform

[9/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
HIVE-11927: Implement/Enable constant related optimization rules in Calcite: 
enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed 
by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b340ecb5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b340ecb5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b340ecb5

Branch: refs/heads/master
Commit: b340ecb5e163277e86def59b454b8c041ece39d5
Parents: af96223
Author: Pengcheng Xiong 
Authored: Tue Dec 22 21:06:14 2015 -0800
Committer: Pengcheng Xiong 
Committed: Tue Dec 22 21:06:14 2015 -0800

--
 .../calcite/CalciteSemanticException.java   |   2 +-
 .../optimizer/calcite/HiveRexExecutorImpl.java  |  80 ++
 .../rules/HiveReduceExpressionsRule.java| 905 +++
 .../calcite/translator/ASTBuilder.java  |  30 +-
 .../calcite/translator/RexNodeConverter.java|  48 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java|  84 +-
 ql/src/test/queries/clientpositive/cbo_const.q  |  52 ++
 .../queries/clientpositive/constantfolding.q|  88 ++
 .../clientpositive/annotate_stats_select.q.out  |  58 +-
 .../bucketizedhiveinputformat.q.out |   2 +
 ql/src/test/results/clientpositive/cast1.q.out  |  10 +-
 .../test/results/clientpositive/cbo_const.q.out | 334 +++
 .../cbo_rp_cross_product_check_2.q.out  |   4 +-
 .../clientpositive/cbo_rp_lineage2.q.out|   2 +-
 .../clientpositive/constantfolding.q.out| 305 +++
 .../clientpositive/cross_product_check_1.q.out  |   4 +-
 .../clientpositive/cross_product_check_2.q.out  |   4 +-
 .../clientpositive/dynamic_rdd_cache.q.out  |  46 +-
 .../dynpart_sort_optimization2.q.out|  45 +-
 .../results/clientpositive/groupby_ppd.q.out|  18 +-
 .../clientpositive/groupby_sort_1_23.q.out  |  12 +-
 .../clientpositive/groupby_sort_skew_1_23.q.out |  12 +-
 .../results/clientpositive/input_part1.q.out|   4 +-
 .../results/clientpositive/input_part5.q.out|   2 +-
 .../results/clientpositive/input_part6.q.out|   2 +-
 .../test/results/clientpositive/lineage2.q.out  |   2 +-
 .../test/results/clientpositive/lineage3.q.out  |  10 +-
 .../list_bucket_query_oneskew_2.q.out   |  50 +-
 .../llap/bucket_map_join_tez1.q.out |   4 +-
 .../llap/vector_join_part_col_char.q.out|   2 +-
 .../clientpositive/orc_predicate_pushdown.q.out |  22 +-
 .../parquet_predicate_pushdown.q.out|  22 +-
 .../clientpositive/partition_multilevels.q.out  |  44 +-
 .../results/clientpositive/perf/query31.q.out   | 338 +++
 .../results/clientpositive/perf/query39.q.out   |  58 +-
 .../results/clientpositive/perf/query42.q.out   | 171 ++--
 .../results/clientpositive/perf/query52.q.out   | 163 ++--
 .../results/clientpositive/perf/query64.q.out   | 362 
 .../results/clientpositive/perf/query66.q.out   | 627 ++---
 .../results/clientpositive/perf/query75.q.out   | 643 ++---
 .../results/clientpositive/pointlookup2.q.out   |   6 +-
 .../results/clientpositive/quotedid_basic.q.out |  62 +-
 .../clientpositive/quotedid_partition.q.out |  22 +-
 .../spark/bucket_map_join_tez1.q.out|   4 +-
 .../spark/bucketizedhiveinputformat.q.out   |   2 +
 .../spark/cross_product_check_1.q.out   |   4 +-
 .../spark/cross_product_check_2.q.out   |   4 +-
 .../spark/dynamic_rdd_cache.q.out   |  46 +-
 .../spark/groupby_sort_1_23.q.out   |  12 +-
 .../spark/groupby_sort_skew_1_23.q.out  |  12 +-
 .../clientpositive/spark/union_remove_25.q.out  |  16 +-
 .../clientpositive/spark/union_view.q.out   |  60 +-
 .../results/clientpositive/subquery_notin.q.out |  18 +-
 .../subquery_notin_having.q.java1.7.out |  10 +-
 .../subquery_unqualcolumnrefs.q.out |   2 +-
 .../tez/bucket_map_join_tez1.q.out  |   4 +-
 .../tez/cross_product_check_1.q.out |   4 +-
 .../tez/cross_product_check_2.q.out |   4 +-
 .../tez/dynpart_sort_optimization2.q.out|  34 +-
 .../clientpositive/tez/explainuser_1.q.out  | 260 +++---
 .../tez/vector_decimal_round.q.out  |   6 +-
 ql/src/test/results/clientpositive/udf1.q.out   |  40 +-
 .../results/clientpositive/udf_10_trims.q.out   |   2 +-
 .../clientpositive/udf_folder_constants.q.out   |   8 +-
 .../clientpositive/union_remove_25.q.out|  20 +-
 .../results/clientpositive/union_view.q.out |  60 +-
 .../clientpositive/vector_decimal_round.q.out   |   6 +-
 67 files changed, 3660 insertions(+), 1739 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java

[6/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/perf/query39.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/query39.q.out 
b/ql/src/test/results/clientpositive/perf/query39.q.out
index a18cdaf..f4ad98c 100644
--- a/ql/src/test/results/clientpositive/perf/query39.q.out
+++ b/ql/src/test/results/clientpositive/perf/query39.q.out
@@ -30,17 +30,17 @@ Stage-0
 |  Statistics:Num rows: 112735 Data size: 161919824 Basic stats: 
COMPLETE Column stats: NONE
 |<-Reducer 6 [SIMPLE_EDGE]
Reduce Output Operator [RS_60]
-  key expressions:_col0 (type: int), _col1 (type: int), _col2 
(type: int), _col3 (type: double), _col4 (type: double), _col7 (type: int), 
_col8 (type: double), _col9 (type: double)
+  key expressions:_col0 (type: int), _col1 (type: int), 3 
(type: int), _col3 (type: double), _col4 (type: double), _col7 (type: int), 
_col8 (type: double), _col9 (type: double)
   sort order:
   Statistics:Num rows: 112735 Data size: 161919824 Basic 
stats: COMPLETE Column stats: NONE
   value expressions:_col5 (type: int), _col6 (type: int)
   Select Operator [SEL_59]
- 
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
+ 
outputColumnNames:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
  Statistics:Num rows: 112735 Data size: 161919824 Basic 
stats: COMPLETE Column stats: NONE
- Merge Join Operator [MERGEJOIN_103]
+ Merge Join Operator [MERGEJOIN_104]
  |  condition map:[{"":"Inner Join 0 to 1"}]
- |  keys:{"0":"_col2 (type: int), _col1 (type: 
int)","1":"_col2 (type: int), _col1 (type: int)"}
- |  
outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11"]
+ |  keys:{"0":"_col1 (type: int), _col0 (type: 
int)","1":"_col2 (type: int), _col1 (type: int)"}
+ |  
outputColumnNames:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9"]
  |  Statistics:Num rows: 112735 Data size: 161919824 Basic 
stats: COMPLETE Column stats: NONE
  |<-Reducer 15 [SIMPLE_EDGE]
  |  Reduce Output Operator [RS_57]
@@ -78,7 +78,7 @@ Stage-0
  |   Select Operator [SEL_49]
  |  
outputColumnNames:["_col4","_col5","_col6","_col9","_col3"]
  |  Statistics:Num rows: 614922 
Data size: 883199024 Basic stats: COMPLETE Column stats: NONE
- |  Merge Join Operator 
[MERGEJOIN_102]
+ |  Merge Join Operator 
[MERGEJOIN_103]
  |  |  condition map:[{"":"Inner 
Join 0 to 1"}]
  |  |  keys:{"0":"_col0 (type: 
int)","1":"_col0 (type: int)"}
  |  |  
outputColumnNames:["_col3","_col4","_col5","_col6"]
@@ -105,7 +105,7 @@ Stage-0
  |sort order:+
  |Statistics:Num rows: 
559020 Data size: 802908187 Basic stats: COMPLETE Column stats: NONE
  |value expressions:_col3 
(type: int), _col4 (type: int), _col5 (type: int), _col6 (type: string)
- |Merge Join Operator 
[MERGEJOIN_101]
+ |Merge Join Operator 
[MERGEJOIN_102]
  ||  condition 
map:[{"":"Inner Join 0 to 1"}]
  ||  keys:{"0":"_col2 
(type: int)","1":"_col0 (type: int)"}
  ||  
outputColumnNames:["_col0","_col3","_col4","_col5","_col6"]
@@ -133,7 +133,7 @@ Stage-0
  |  sort order:+
  |  Statistics:Num 
rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE
  |  value 
expressions:_col0 (type: int), _col3 (type: int), _col4 (type: int)
- |  Merge Join 
Operator [MERGEJOIN_100]
+ |  Merge Join 
Operator [MERGEJOIN_101]
  |  |  condition 
map:[{"":"Inner Join

[1/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master af962230d -> b340ecb5e


http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out 
b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 78c68c3..8c78fd9 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -2978,61 +2978,61 @@ Stage-0
   limit:-1
   Stage-1
  Reducer 2
- File Output Operator [FS_15]
+ File Output Operator [FS_14]
 compressed:false
 Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column 
stats: COMPLETE
 table:{"input 
format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
-Select Operator [SEL_14]
+Select Operator [SEL_13]
outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE 
Column stats: COMPLETE
-   Filter Operator [FIL_13]
+   Filter Operator [FIL_12]
   predicate:_col3 is null (type: boolean)
   Statistics:Num rows: 1 Data size: 269 Basic stats: COMPLETE 
Column stats: COMPLETE
-  Merge Join Operator [MERGEJOIN_18]
+  Merge Join Operator [MERGEJOIN_17]
   |  condition map:[{"":"Left Outer Join0 to 1"}]
   |  keys:{"0":"_col1 (type: string)","1":"_col1 (type: 
string)"}
   |  outputColumnNames:["_col0","_col1","_col3"]
   |  Statistics:Num rows: 193 Data size: 51917 Basic stats: 
COMPLETE Column stats: COMPLETE
   |<-Map 1 [SIMPLE_EDGE]
-  |  Reduce Output Operator [RS_10]
+  |  Reduce Output Operator [RS_9]
   | key expressions:_col1 (type: string)
   | Map-reduce partition columns:_col1 (type: string)
   | sort order:+
   | Statistics:Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
   | value expressions:_col0 (type: string)
-  | Select Operator [SEL_2]
+  | Select Operator [SEL_1]
   |outputColumnNames:["_col0","_col1"]
   |Statistics:Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
   |TableScan [TS_0]
   |   alias:b
   |   Statistics:Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
   |<-Reducer 4 [SIMPLE_EDGE]
- Reduce Output Operator [RS_11]
+ Reduce Output Operator [RS_10]
 key expressions:_col1 (type: string)
 Map-reduce partition columns:_col1 (type: string)
 sort order:+
 Statistics:Num rows: 83 Data size: 14774 Basic stats: 
COMPLETE Column stats: COMPLETE
-Select Operator [SEL_9]
+Select Operator [SEL_8]
outputColumnNames:["_col1"]
Statistics:Num rows: 83 Data size: 14774 Basic 
stats: COMPLETE Column stats: COMPLETE
-   Group By Operator [GBY_8]
+   Group By Operator [GBY_7]
|  keys:KEY._col0 (type: string), KEY._col1 (type: 
string)
|  outputColumnNames:["_col0","_col1"]
|  Statistics:Num rows: 83 Data size: 14774 Basic 
stats: COMPLETE Column stats: COMPLETE
|<-Map 3 [SIMPLE_EDGE]
-  Reduce Output Operator [RS_7]
+  Reduce Output Operator [RS_6]
  key expressions:_col0 (type: string), _col1 
(type: string)
  Map-reduce partition columns:_col0 (type: 
string), _col1 (type: string)
  sort order:++
  Statistics:Num rows: 83 Data size: 14774 
Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_6]
+ Group By Operator [GBY_5]
 keys:key (type: string), value (type: 
string)
 outputColumnNames:["_col0","_col1"]
 Statistics:Num rows: 83 Data size: 14774 
Basic stats: COMPLETE Column stats: COMPLETE
-  

[8/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/cbo_const.q.out
--
diff --git a/ql/src/test/results/clientpositive/cbo_const.q.out 
b/ql/src/test/results/clientpositive/cbo_const.q.out
new file mode 100644
index 000..adc5232
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_const.q.out
@@ -0,0 +1,334 @@
+PREHOOK: query: select
+  interval_day_time('2 1:2:3'),
+  interval_day_time(cast('2 1:2:3' as string)),
+  interval_day_time(cast('2 1:2:3' as varchar(10))),
+  interval_day_time(cast('2 1:2:3' as char(10))),
+  interval_day_time('2 1:2:3') = interval '2 1:2:3' day to second
+from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+ A masked pattern was here 
+POSTHOOK: query: select
+  interval_day_time('2 1:2:3'),
+  interval_day_time(cast('2 1:2:3' as string)),
+  interval_day_time(cast('2 1:2:3' as varchar(10))),
+  interval_day_time(cast('2 1:2:3' as char(10))),
+  interval_day_time('2 1:2:3') = interval '2 1:2:3' day to second
+from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+ A masked pattern was here 
+2 01:02:03.0   2 01:02:03.02 01:02:03.02 
01:02:03.0true
+PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as 
`date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = 
'2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+ A masked pattern was here 
+POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as 
`date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = 
'2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+ A masked pattern was here 
+1000
+PREHOOK: query: drop view t1
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view t1
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create table t1_new (key string, value string) partitioned by 
(ds string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1_new
+POSTHOOK: query: create table t1_new (key string, value string) partitioned by 
(ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1_new
+PREHOOK: query: insert overwrite table t1_new partition (ds = '2011-10-15')
+select 'key1', 'value1' from src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t1_new@ds=2011-10-15
+POSTHOOK: query: insert overwrite table t1_new partition (ds = '2011-10-15')
+select 'key1', 'value1' from src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t1_new@ds=2011-10-15
+POSTHOOK: Lineage: t1_new PARTITION(ds=2011-10-15).key SIMPLE []
+POSTHOOK: Lineage: t1_new PARTITION(ds=2011-10-15).value SIMPLE []
+PREHOOK: query: insert overwrite table t1_new partition (ds = '2011-10-16')
+select 'key2', 'value2' from src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t1_new@ds=2011-10-16
+POSTHOOK: query: insert overwrite table t1_new partition (ds = '2011-10-16')
+select 'key2', 'value2' from src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t1_new@ds=2011-10-16
+POSTHOOK: Lineage: t1_new PARTITION(ds=2011-10-16).key SIMPLE []
+POSTHOOK: Lineage: t1_new PARTITION(ds=2011-10-16).value SIMPLE []
+PREHOOK: query: create view t1 partitioned on (ds) as
+select * from
+(
+select key, value, ds from t1_new
+union all
+select key, value, ds from t1_new
+)subq
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t1_new
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create view t1 partitioned on (ds) as
+select * from
+(
+select key, value, ds from t1_new
+union all
+select key, value, ds from t1_new
+)subq
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t1_new
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: select * from t1 where ds = '2011-10-15'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1_new
+PREHOOK: Input: default@t1_new@ds=2011-10-15
+ A masked pattern was here 
+POSTHOOK: query: select * from t1 where ds = '2011-10-15'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1_new
+POSTHOOK: Input: default@t1_new@ds=2011-10-15
+ A masked pattern was here 
+key1   value1  2011-10-15
+key1

[4/9] hive git commit: HIVE-11927: Implement/Enable constant related optimization rules in Calcite: enable HiveReduceExpressionsRule to fold constants (Pengcheng Xiong, reviewed by Laljo John Pullokka

2015-12-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/b340ecb5/ql/src/test/results/clientpositive/perf/query66.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/query66.q.out 
b/ql/src/test/results/clientpositive/perf/query66.q.out
index 22eaf61..b2e6bf7 100644
--- a/ql/src/test/results/clientpositive/perf/query66.q.out
+++ b/ql/src/test/results/clientpositive/perf/query66.q.out
@@ -472,325 +472,328 @@ Stage-0
  key expressions:_col0 (type: string)
  sort order:+
  Statistics:Num rows: 26136 Data size: 12310056 Basic 
stats: COMPLETE Column stats: NONE
- value expressions:_col1 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: string), 
_col6 (type: string), _col7 (type: int), _col8 (type: decimal(38,2)), _col9 
(type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: 
decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), 
_col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: 
decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,2)), 
_col19 (type: decimal(38,2)), _col20 (type: decimal(38,12)), _col21 (type: 
decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), 
_col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: 
decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), 
_col29 (type: decimal(38,12)), _col30 (type: decimal(38,12)), _col31 (type: 
decimal(38,12)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), 
_col34 (type: decima
 l(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 
(type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: 
decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2)), 
_col42 (type: decimal(38,2)), _col43 (type: decimal(38,2))
- Group By Operator [GBY_71]
- |  
aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"]
- |  keys:KEY._col0 (type: string), KEY._col1 (type: int), 
KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), 
KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: int)
- |  
outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"]
- |  Statistics:Num rows: 26136 Data size: 12310056 Basic 
stats: COMPLETE Column stats: NONE
- |<-Union 7 [SIMPLE_EDGE]
-|<-Reducer 19 [CONTAINS]
-|  Reduce Output Operator [RS_70]
-| key expressions:_col0 (type: string), _col1 
(type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: string), _col6 (type: string), _col7 (type: int)
-| Map-reduce partition columns:_col0 (type: 
string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 
(type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int)
-| sort order:
-| Statistics:Num rows: 52272 Data size: 24620112 
Basic stats: COMPLETE Column stats: NONE
-| value expressions:_col8 (type: decimal(38,2)), 
_col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: 
decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), 
_col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: 
decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,2)), 
_col19 (type: decimal(38,2)), _col20 (type: decimal(38,12)), _col21 (type: 
decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), 
_col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: 
decimal(38,12)), _col27 (type: decimal(3

hive git commit: HIVE-12751: Fix NVL explain syntax (Gopal V via Pengcheng Xiong)

2015-12-27 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 1753630fe -> da0070b2c


HIVE-12751: Fix NVL explain syntax (Gopal V via Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/da0070b2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/da0070b2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/da0070b2

Branch: refs/heads/master
Commit: da0070b2c15d6590ae10d6e37633a59544b09746
Parents: 1753630
Author: Pengcheng Xiong 
Authored: Sun Dec 27 10:55:04 2015 -0800
Committer: Pengcheng Xiong 
Committed: Sun Dec 27 10:55:04 2015 -0800

--
 .../org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java   | 6 +++---
 ql/src/test/results/clientpositive/tez/vector_nvl.q.out| 2 +-
 ql/src/test/results/clientpositive/vector_nvl.q.out| 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/da0070b2/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
index 0a16da8..87fd461 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
@@ -64,11 +64,11 @@ public class GenericUDFNvl extends GenericUDF{
   @Override
   public String getDisplayString(String[] children) {
 StringBuilder sb = new StringBuilder();
-sb.append("if ");
+sb.append("NVL(");
 sb.append(children[0]);
-sb.append(" is null ");
-sb.append("returns");
+sb.append(',');
 sb.append(children[1]);
+sb.append(')');
 return sb.toString() ;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/da0070b2/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/vector_nvl.q.out 
b/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
index b3f83ce..b926ab4b 100644
--- a/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
@@ -70,7 +70,7 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Select Operator
-expressions: cfloat (type: float), if cfloat is null returns1 
(type: float)
+expressions: cfloat (type: float), NVL(cfloat,1) (type: float)
 outputColumnNames: _col0, _col1
 Limit
   Number of rows: 10

http://git-wip-us.apache.org/repos/asf/hive/blob/da0070b2/ql/src/test/results/clientpositive/vector_nvl.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_nvl.q.out 
b/ql/src/test/results/clientpositive/vector_nvl.q.out
index 88836c0..88e8dca 100644
--- a/ql/src/test/results/clientpositive/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/vector_nvl.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
 alias: alltypesorc
 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: cfloat (type: float), if cfloat is null returns1 
(type: float)
+  expressions: cfloat (type: float), NVL(cfloat,1) (type: float)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
   Limit



hive git commit: HIVE-12742: NULL table comparison within CASE does not work as previous hive versions (Pengcheng Xiong, reviewed by Gopal V)

2015-12-27 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master da0070b2c -> 054be7c25


HIVE-12742: NULL table comparison within CASE does not work as previous hive 
versions (Pengcheng Xiong, reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/054be7c2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/054be7c2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/054be7c2

Branch: refs/heads/master
Commit: 054be7c25e78d424878c0dc708b131171b20fb9c
Parents: da0070b
Author: Pengcheng Xiong 
Authored: Sun Dec 27 11:05:27 2015 -0800
Committer: Pengcheng Xiong 
Committed: Sun Dec 27 11:05:27 2015 -0800

--
 .../optimizer/ConstantPropagateProcFactory.java |  31 +-
 .../queries/clientpositive/constantPropWhen.q   |  43 +++
 .../clientpositive/constantPropWhen.q.out   | 382 +++
 .../test/results/clientpositive/fold_case.q.out |  16 +-
 .../clientpositive/fold_eq_with_case_when.q.out |   8 +-
 .../test/results/clientpositive/fold_when.q.out |  20 +-
 6 files changed, 474 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/054be7c2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index d182d80..ea200db 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -69,6 +69,7 @@ import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
@@ -726,8 +727,18 @@ public final class ConstantPropagateProcFactory {
 } else if(thenVal.equals(elseVal)){
   return thenExpr;
 } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
-  return Boolean.TRUE.equals(thenVal) ? whenExpr :
-ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), 
newExprs.subList(0, 1));
+  List children = new ArrayList<>();
+  children.add(whenExpr);
+  children.add(new ExprNodeConstantDesc(false));
+  ExprNodeGenericFuncDesc func = 
ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(),
+  children);
+  if (Boolean.TRUE.equals(thenVal)) {
+return func;
+  } else {
+List exprs = new ArrayList<>();
+exprs.add(func);
+return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), 
exprs);
+  }
 } else {
   return null;
 }
@@ -767,8 +778,20 @@ public final class ConstantPropagateProcFactory {
 } else if(thenVal.equals(elseVal)){
   return thenExpr;
 } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
-  return Boolean.TRUE.equals(thenVal) ? 
ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), 
newExprs.subList(0, 2)) :
-ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNotEqual(), 
newExprs.subList(0, 2));
+  ExprNodeGenericFuncDesc equal = ExprNodeGenericFuncDesc.newInstance(
+  new GenericUDFOPEqual(), newExprs.subList(0, 2));
+  List children = new ArrayList<>();
+  children.add(equal);
+  children.add(new ExprNodeConstantDesc(false));
+  ExprNodeGenericFuncDesc func = 
ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(),
+  children);
+  if (Boolean.TRUE.equals(thenVal)) {
+return func;
+  } else {
+List exprs = new ArrayList<>();
+exprs.add(func);
+return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), 
exprs);
+  }
 } else {
   return null;
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/054be7c2/ql/src/test/queries/clientpositive/constantPropWhen.q
--
diff --git a/ql/src/test/queries/clientpositive/constantPropWhen.q 
b/ql/src/test/queries/clientpositive/constantPropWhen.q
new file mode 100644
index 000..c1d4885
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constantPropWhen

[2/2] hive git commit: HIVE-12742: NULL table comparison within CASE does not work as previous hive versions (Pengcheng Xiong, reviewed by Gopal V)

2015-12-27 Thread pxiong
HIVE-12742: NULL table comparison within CASE does not work as previous hive 
versions (Pengcheng Xiong, reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1fbf12d4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1fbf12d4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1fbf12d4

Branch: refs/heads/branch-2.0
Commit: 1fbf12d4528da3a051f2a0d11c5a76a61db399e4
Parents: 28842f9
Author: Pengcheng Xiong 
Authored: Sun Dec 27 11:05:27 2015 -0800
Committer: Pengcheng Xiong 
Committed: Sun Dec 27 11:30:15 2015 -0800

--
 .../optimizer/ConstantPropagateProcFactory.java |  31 +-
 .../queries/clientpositive/constantPropWhen.q   |  43 +++
 .../clientpositive/constantPropWhen.q.out   | 382 +++
 .../test/results/clientpositive/fold_case.q.out |  16 +-
 .../clientpositive/fold_eq_with_case_when.q.out |   8 +-
 .../test/results/clientpositive/fold_when.q.out |  20 +-
 6 files changed, 474 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1fbf12d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index d182d80..ea200db 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -69,6 +69,7 @@ import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
@@ -726,8 +727,18 @@ public final class ConstantPropagateProcFactory {
 } else if(thenVal.equals(elseVal)){
   return thenExpr;
 } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
-  return Boolean.TRUE.equals(thenVal) ? whenExpr :
-ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), 
newExprs.subList(0, 1));
+  List children = new ArrayList<>();
+  children.add(whenExpr);
+  children.add(new ExprNodeConstantDesc(false));
+  ExprNodeGenericFuncDesc func = 
ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(),
+  children);
+  if (Boolean.TRUE.equals(thenVal)) {
+return func;
+  } else {
+List exprs = new ArrayList<>();
+exprs.add(func);
+return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), 
exprs);
+  }
 } else {
   return null;
 }
@@ -767,8 +778,20 @@ public final class ConstantPropagateProcFactory {
 } else if(thenVal.equals(elseVal)){
   return thenExpr;
 } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
-  return Boolean.TRUE.equals(thenVal) ? 
ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), 
newExprs.subList(0, 2)) :
-ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNotEqual(), 
newExprs.subList(0, 2));
+  ExprNodeGenericFuncDesc equal = ExprNodeGenericFuncDesc.newInstance(
+  new GenericUDFOPEqual(), newExprs.subList(0, 2));
+  List children = new ArrayList<>();
+  children.add(equal);
+  children.add(new ExprNodeConstantDesc(false));
+  ExprNodeGenericFuncDesc func = 
ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(),
+  children);
+  if (Boolean.TRUE.equals(thenVal)) {
+return func;
+  } else {
+List exprs = new ArrayList<>();
+exprs.add(func);
+return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), 
exprs);
+  }
 } else {
   return null;
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/1fbf12d4/ql/src/test/queries/clientpositive/constantPropWhen.q
--
diff --git a/ql/src/test/queries/clientpositive/constantPropWhen.q 
b/ql/src/test/queries/clientpositive/constantPropWhen.q
new file mode 100644
index 000..c1d4885
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constantPropWhen.q
@@ -0,0 +1,43 @@
+set hive.mapred.mode=nonstrict;
+
+drop table test_1; 

[1/2] hive git commit: HIVE-12751: Fix NVL explain syntax (Gopal V via Pengcheng Xiong)

2015-12-27 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.0 55c629691 -> 1fbf12d45


HIVE-12751: Fix NVL explain syntax (Gopal V via Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/28842f9b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/28842f9b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/28842f9b

Branch: refs/heads/branch-2.0
Commit: 28842f9b9f4e18f0b881778409b69b70c81d0b62
Parents: 55c6296
Author: Pengcheng Xiong 
Authored: Sun Dec 27 10:55:04 2015 -0800
Committer: Pengcheng Xiong 
Committed: Sun Dec 27 11:30:04 2015 -0800

--
 .../org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java   | 6 +++---
 ql/src/test/results/clientpositive/tez/vector_nvl.q.out| 2 +-
 ql/src/test/results/clientpositive/vector_nvl.q.out| 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/28842f9b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
index 0a16da8..87fd461 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFNvl.java
@@ -64,11 +64,11 @@ public class GenericUDFNvl extends GenericUDF{
   @Override
   public String getDisplayString(String[] children) {
 StringBuilder sb = new StringBuilder();
-sb.append("if ");
+sb.append("NVL(");
 sb.append(children[0]);
-sb.append(" is null ");
-sb.append("returns");
+sb.append(',');
 sb.append(children[1]);
+sb.append(')');
 return sb.toString() ;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/28842f9b/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/vector_nvl.q.out 
b/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
index b3f83ce..b926ab4b 100644
--- a/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_nvl.q.out
@@ -70,7 +70,7 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Select Operator
-expressions: cfloat (type: float), if cfloat is null returns1 
(type: float)
+expressions: cfloat (type: float), NVL(cfloat,1) (type: float)
 outputColumnNames: _col0, _col1
 Limit
   Number of rows: 10

http://git-wip-us.apache.org/repos/asf/hive/blob/28842f9b/ql/src/test/results/clientpositive/vector_nvl.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_nvl.q.out 
b/ql/src/test/results/clientpositive/vector_nvl.q.out
index 88836c0..88e8dca 100644
--- a/ql/src/test/results/clientpositive/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/vector_nvl.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
 alias: alltypesorc
 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: cfloat (type: float), if cfloat is null returns1 
(type: float)
+  expressions: cfloat (type: float), NVL(cfloat,1) (type: float)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
   Limit



[1/2] hive git commit: HIVE-12744: GROUPING__ID failed to be recognized in multiple insert (Pengcheng Xiong via Ashutosh Chauhan)

2015-12-29 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 9f025baa9 -> 7b7d288d6


HIVE-12744: GROUPING__ID failed to be recognized in multiple insert (Pengcheng 
Xiong via Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9d13a7e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9d13a7e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9d13a7e

Branch: refs/heads/master
Commit: b9d13a7e0f344ff446265d04bf953e2c7ea12991
Parents: 9f025ba
Author: Pengcheng Xiong 
Authored: Tue Dec 29 10:28:00 2015 -0800
Committer: Pengcheng Xiong 
Committed: Tue Dec 29 10:28:00 2015 -0800

--
 .../org/apache/hadoop/hive/ql/ErrorMsg.java |   3 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   8 +
 .../clientnegative/groupby_cube_multi_gby.q |  10 ++
 .../clientpositive/groupby_cube_multi_gby.q |  12 ++
 .../clientnegative/groupby_cube_multi_gby.q.out |  17 +++
 .../clientpositive/groupby_cube_multi_gby.q.out | 146 +++
 6 files changed, 196 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b9d13a7e/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 7585bad..6a62592 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -433,6 +433,9 @@ public enum ErrorMsg {
   CANNOT_CHANGE_COLUMN_TYPE(10312, "Changing from type {0} to {1} is not 
supported for column {2}. SerDe may be incompatible", true),
   REPLACE_CANNOT_DROP_COLUMNS(10313, "Replacing columns cannot drop columns 
for table {0}. SerDe may be incompatible", true),
   REPLACE_UNSUPPORTED_TYPE_CONVERSION(10314, "Replacing columns with 
unsupported type conversion (from {0} to {1}) for column {2}. SerDe may be 
incompatible", true),
+  HIVE_GROUPING_SETS_AGGR_NOMAPAGGR_MULTIGBY(10315,
+  "Grouping sets aggregations (with rollups or cubes) are not allowed when 
" +
+  "HIVEMULTIGROUPBYSINGLEREDUCER is turned on. Set 
hive.multigroupby.singlereducer=false if you want to use grouping sets"),
   //== 2 range starts here 
//
   SCRIPT_INIT_ERROR(2, "Unable to initialize custom script."),
   SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your 
custom script. "

http://git-wip-us.apache.org/repos/asf/hive/blob/b9d13a7e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 403eda2..ab9271f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -5374,6 +5374,14 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 List whereExpressions =
 new ArrayList();
 for (String dest : dests) {
+  ObjectPair, List> grpByExprsGroupingSets =
+  getGroupByGroupingSetsForClause(parseInfo, dest);
+
+  List groupingSets = grpByExprsGroupingSets.getSecond();
+  if (!groupingSets.isEmpty()) {
+throw new 
SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR_MULTIGBY.getMsg());
+  }
+  
   ASTNode whereExpr = parseInfo.getWhrForClause(dest);
 
   if (whereExpr != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/b9d13a7e/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q
--
diff --git a/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q 
b/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q
new file mode 100644
index 000..cddbe1a
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q
@@ -0,0 +1,10 @@
+create table t1 like src;
+create table t2 like src;
+
+explain from src
+insert into table t1 select
+key, GROUPING__ID
+group by key, value with cube
+insert into table t2 select
+key, value
+group by key, value grouping sets ((key), (key, value));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b9d13a7e/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
--
diff --git a/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q 
b/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
new file mode 100644
index 000..80022bb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
@@ -0,0 +1,12 @@
+set hive.

[2/2] hive git commit: HIVE-12752: Change the schema version to 2.1.0 (Shinichi Yamashita, reviewed by Prasad Mujumdar, Pengcheng Xiong)

2015-12-29 Thread pxiong
HIVE-12752: Change the schema version to 2.1.0 (Shinichi Yamashita, reviewed by 
Prasad Mujumdar, Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b7d288d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b7d288d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b7d288d

Branch: refs/heads/master
Commit: 7b7d288d6e67fc28672764c3a20bad4bf10ba9f5
Parents: b9d13a7
Author: Pengcheng Xiong 
Authored: Tue Dec 29 10:32:24 2015 -0800
Committer: Pengcheng Xiong 
Committed: Tue Dec 29 10:32:24 2015 -0800

--
 metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql| 2 +-
 metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql   | 2 +-
 metastore/scripts/upgrade/mssql/hive-schema-2.1.0.mssql.sql| 2 +-
 metastore/scripts/upgrade/mssql/upgrade-2.0.0-to-2.1.0.mssql.sql   | 2 +-
 metastore/scripts/upgrade/mysql/hive-schema-2.1.0.mysql.sql| 2 +-
 metastore/scripts/upgrade/mysql/upgrade-2.0.0-to-2.1.0.mysql.sql   | 2 +-
 metastore/scripts/upgrade/oracle/hive-schema-2.1.0.oracle.sql  | 2 +-
 metastore/scripts/upgrade/oracle/upgrade-2.0.0-to-2.1.0.oracle.sql | 2 +-
 metastore/scripts/upgrade/postgres/hive-schema-2.1.0.postgres.sql  | 2 +-
 .../scripts/upgrade/postgres/upgrade-2.0.0-to-2.1.0.postgres.sql   | 2 +-
 pom.xml| 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/7b7d288d/metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql
--
diff --git a/metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql 
b/metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql
index f08de64..8083199 100644
--- a/metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql
@@ -330,5 +330,5 @@ RUN 'hive-txn-schema-0.13.0.derby.sql';
 -- -
 -- Record schema version. Should be the last step in the init script
 -- -
-INSERT INTO "APP"."VERSION" (VER_ID, SCHEMA_VERSION, VERSION_COMMENT) VALUES 
(1, '2.0.0', 'Hive release version 2.0.0');
+INSERT INTO "APP"."VERSION" (VER_ID, SCHEMA_VERSION, VERSION_COMMENT) VALUES 
(1, '2.1.0', 'Hive release version 2.1.0');
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b7d288d/metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql
--
diff --git a/metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql 
b/metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql
index a389d7c..30de00b 100644
--- a/metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql
@@ -1,3 +1,3 @@
 -- Upgrade MetaStore schema from 2.0.0 to 2.1.0
 
-UPDATE "APP".VERSION SET SCHEMA_VERSION='2.1.0', VERSION_COMMENT='Hive release 
version 2.0.0' where VER_ID=1;
+UPDATE "APP".VERSION SET SCHEMA_VERSION='2.1.0', VERSION_COMMENT='Hive release 
version 2.1.0' where VER_ID=1;

http://git-wip-us.apache.org/repos/asf/hive/blob/7b7d288d/metastore/scripts/upgrade/mssql/hive-schema-2.1.0.mssql.sql
--
diff --git a/metastore/scripts/upgrade/mssql/hive-schema-2.1.0.mssql.sql 
b/metastore/scripts/upgrade/mssql/hive-schema-2.1.0.mssql.sql
index 1ec8632..731cc25 100644
--- a/metastore/scripts/upgrade/mssql/hive-schema-2.1.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/hive-schema-2.1.0.mssql.sql
@@ -944,4 +944,4 @@ ALTER TABLE TXN_COMPONENTS  WITH CHECK ADD FOREIGN 
KEY(TC_TXNID) REFERENCES TXNS
 -- -
 -- Record schema version. Should be the last step in the init script
 -- -
-INSERT INTO VERSION (VER_ID, SCHEMA_VERSION, VERSION_COMMENT) VALUES (1, 
'2.0.0', 'Hive release version 2.0.0');
+INSERT INTO VERSION (VER_ID, SCHEMA_VERSION, VERSION_COMMENT) VALUES (1, 
'2.1.0', 'Hive release version 2.1.0');

http://git-wip-us.apache.org/repos/asf/hive/blob/7b7d288d/metastore/scripts/upgrade/mssql/upgrade-2.0.0-to-2.1.0.mssql.sql
--
diff --git a/metastore/scripts/upgrade/mssql/upgrade-2.0.0-to-2.1.0.mssql.sql 
b/metastore/scripts/upgrade/mssql/upgrade-2.0.0-to-2.1.0.mssql.sql
index 03ff816..0b806eb 100644
--- a/metastore/scripts/upgrade/mssql/upgrade-2.0.0-to-2.1.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-2.0.0-to-2.1.0.mssql.sql
@@ -1,4 +1,4 @@
 SELECT 

hive git commit: HIVE-12744: GROUPING__ID failed to be recognized in multiple insert (Pengcheng Xiong via Ashutosh Chauhan)

2015-12-29 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.0 06902462f -> 7cbbfa777


HIVE-12744: GROUPING__ID failed to be recognized in multiple insert (Pengcheng 
Xiong via Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7cbbfa77
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7cbbfa77
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7cbbfa77

Branch: refs/heads/branch-2.0
Commit: 7cbbfa77754a7cb19af3a8e1c9425ce94cf64938
Parents: 0690246
Author: Pengcheng Xiong 
Authored: Tue Dec 29 10:28:00 2015 -0800
Committer: Pengcheng Xiong 
Committed: Tue Dec 29 10:46:25 2015 -0800

--
 .../org/apache/hadoop/hive/ql/ErrorMsg.java |   3 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   8 +
 .../clientnegative/groupby_cube_multi_gby.q |  10 ++
 .../clientpositive/groupby_cube_multi_gby.q |  12 ++
 .../clientnegative/groupby_cube_multi_gby.q.out |  17 +++
 .../clientpositive/groupby_cube_multi_gby.q.out | 146 +++
 6 files changed, 196 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/7cbbfa77/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 7585bad..6a62592 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -433,6 +433,9 @@ public enum ErrorMsg {
   CANNOT_CHANGE_COLUMN_TYPE(10312, "Changing from type {0} to {1} is not 
supported for column {2}. SerDe may be incompatible", true),
   REPLACE_CANNOT_DROP_COLUMNS(10313, "Replacing columns cannot drop columns 
for table {0}. SerDe may be incompatible", true),
   REPLACE_UNSUPPORTED_TYPE_CONVERSION(10314, "Replacing columns with 
unsupported type conversion (from {0} to {1}) for column {2}. SerDe may be 
incompatible", true),
+  HIVE_GROUPING_SETS_AGGR_NOMAPAGGR_MULTIGBY(10315,
+  "Grouping sets aggregations (with rollups or cubes) are not allowed when 
" +
+  "HIVEMULTIGROUPBYSINGLEREDUCER is turned on. Set 
hive.multigroupby.singlereducer=false if you want to use grouping sets"),
   //== 2 range starts here 
//
   SCRIPT_INIT_ERROR(2, "Unable to initialize custom script."),
   SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your 
custom script. "

http://git-wip-us.apache.org/repos/asf/hive/blob/7cbbfa77/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 2433cae..197e6da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -5373,6 +5373,14 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 List whereExpressions =
 new ArrayList();
 for (String dest : dests) {
+  ObjectPair, List> grpByExprsGroupingSets =
+  getGroupByGroupingSetsForClause(parseInfo, dest);
+
+  List groupingSets = grpByExprsGroupingSets.getSecond();
+  if (!groupingSets.isEmpty()) {
+throw new 
SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR_MULTIGBY.getMsg());
+  }
+  
   ASTNode whereExpr = parseInfo.getWhrForClause(dest);
 
   if (whereExpr != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/7cbbfa77/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q
--
diff --git a/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q 
b/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q
new file mode 100644
index 000..cddbe1a
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/groupby_cube_multi_gby.q
@@ -0,0 +1,10 @@
+create table t1 like src;
+create table t2 like src;
+
+explain from src
+insert into table t1 select
+key, GROUPING__ID
+group by key, value with cube
+insert into table t2 select
+key, value
+group by key, value grouping sets ((key), (key, value));
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/7cbbfa77/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
--
diff --git a/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q 
b/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
new file mode 100644
index 000..80022bb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
@@ -0,0 +1,12 @@
+s

hive git commit: HIVE-12782: update the golden files for some tests that fail (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-01-05 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master e5c3cf1de -> af0522754


HIVE-12782: update the golden files for some tests that fail (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af052275
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af052275
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af052275

Branch: refs/heads/master
Commit: af0522754525b32c78c6bbc74e1e0a2875249d7e
Parents: e5c3cf1
Author: Pengcheng Xiong 
Authored: Tue Jan 5 13:06:33 2016 -0800
Committer: Pengcheng Xiong 
Committed: Tue Jan 5 13:06:33 2016 -0800

--
 .../encryption_insert_partition_dynamic.q   |  12 -
 ql/src/test/queries/clientpositive/order2.q |   5 -
 ...lumnstats_partlvl_multiple_part_clause.q.out |   2 +-
 .../encryption_insert_partition_dynamic.q.out   | 752 +--
 ql/src/test/results/clientpositive/order2.q.out |  56 --
 5 files changed, 14 insertions(+), 813 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/af052275/ql/src/test/queries/clientpositive/encryption_insert_partition_dynamic.q
--
diff --git 
a/ql/src/test/queries/clientpositive/encryption_insert_partition_dynamic.q 
b/ql/src/test/queries/clientpositive/encryption_insert_partition_dynamic.q
index 6f1f621..11df6f7 100644
--- a/ql/src/test/queries/clientpositive/encryption_insert_partition_dynamic.q
+++ b/ql/src/test/queries/clientpositive/encryption_insert_partition_dynamic.q
@@ -20,10 +20,6 @@ create table unencryptedTable(value string)
 partitioned by (key string) clustered by (value) into 2 buckets stored as 
orc TBLPROPERTIES ('transactional'='true');
 
 -- insert encrypted table from values
-explain extended insert into table encryptedTable partition (key) values
-('val_501', '501'),
-('val_502', '502');
-
 insert into table encryptedTable partition (key) values
 ('val_501', '501'),
 ('val_502', '502');
@@ -31,10 +27,6 @@ insert into table encryptedTable partition (key) values
 select * from encryptedTable order by key;
 
 -- insert encrypted table from unencrypted source
-explain extended from src
-insert into table encryptedTable partition (key)
-select value, key limit 2;
-
 from src
 insert into table encryptedTable partition (key)
 select value, key limit 2;
@@ -42,10 +34,6 @@ insert into table encryptedTable partition (key)
 select * from encryptedTable order by key;
 
 -- insert unencrypted table from encrypted source
-explain extended from encryptedTable
-insert into table unencryptedTable partition (key)
-select value, key;
-
 from encryptedTable
 insert into table unencryptedTable partition (key)
 select value, key;

http://git-wip-us.apache.org/repos/asf/hive/blob/af052275/ql/src/test/queries/clientpositive/order2.q
--
diff --git a/ql/src/test/queries/clientpositive/order2.q 
b/ql/src/test/queries/clientpositive/order2.q
index 86617f4..504589a 100644
--- a/ql/src/test/queries/clientpositive/order2.q
+++ b/ql/src/test/queries/clientpositive/order2.q
@@ -1,11 +1,6 @@
 set hive.mapred.mode=nonstrict;
 set hive.optimize.ppd=true;
 
-EXPLAIN
-SELECT subq.key, subq.value FROM 
-(SELECT x.* FROM SRC x ORDER BY key limit 10) subq
-where subq.key < 10;
-
 SELECT subq.key, subq.value FROM 
 (SELECT x.* FROM SRC x ORDER BY key limit 10) subq
 where subq.key < 10;

http://git-wip-us.apache.org/repos/asf/hive/blob/af052275/ql/src/test/results/clientnegative/columnstats_partlvl_multiple_part_clause.q.out
--
diff --git 
a/ql/src/test/results/clientnegative/columnstats_partlvl_multiple_part_clause.q.out
 
b/ql/src/test/results/clientnegative/columnstats_partlvl_multiple_part_clause.q.out
index ce79830..feae5ef 100644
--- 
a/ql/src/test/results/clientnegative/columnstats_partlvl_multiple_part_clause.q.out
+++ 
b/ql/src/test/results/clientnegative/columnstats_partlvl_multiple_part_clause.q.out
@@ -66,4 +66,4 @@ POSTHOOK: type: LOAD
  A masked pattern was here 
 POSTHOOK: Output: default@employee_part
 POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK
-FAILED: ParseException line 5:79 mismatched input 'partition' expecting 
KW_COMPUTE near ')' in analyze statement
+FAILED: ParseException line 5:79 cannot recognize input near 'partition' '(' 
'employeeSalary' in analyze statement

http://git-wip-us.apache.org/repos/asf/hive/blob/af052275/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q

hive git commit: HIVE-12793: Address TestSparkCliDriver.testCliDriver_order2 failure due to HIVE-12782 (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-01-06 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 8069b59a0 -> ac5ad78fb


HIVE-12793: Address TestSparkCliDriver.testCliDriver_order2 failure due to 
HIVE-12782 (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ac5ad78f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ac5ad78f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ac5ad78f

Branch: refs/heads/master
Commit: ac5ad78fbf939b5c5baffe9b690c9b146b15aa88
Parents: 8069b59
Author: Pengcheng Xiong 
Authored: Wed Jan 6 15:11:51 2016 -0800
Committer: Pengcheng Xiong 
Committed: Wed Jan 6 15:12:20 2016 -0800

--
 .../results/clientpositive/spark/order2.q.out   | 62 
 1 file changed, 62 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ac5ad78f/ql/src/test/results/clientpositive/spark/order2.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/order2.q.out 
b/ql/src/test/results/clientpositive/spark/order2.q.out
index 628ea29..5ec10fe 100644
--- a/ql/src/test/results/clientpositive/spark/order2.q.out
+++ b/ql/src/test/results/clientpositive/spark/order2.q.out
@@ -1,65 +1,3 @@
-PREHOOK: query: EXPLAIN
-SELECT subq.key, subq.value FROM 
-(SELECT x.* FROM SRC x ORDER BY key limit 10) subq
-where subq.key < 10
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
-SELECT subq.key, subq.value FROM 
-(SELECT x.* FROM SRC x ORDER BY key limit 10) subq
-where subq.key < 10
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Spark
-  Edges:
-Reducer 2 <- Map 1 (SORT, 1)
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: x
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: key (type: string), value (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: string)
-  sort order: +
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  TopN Hash Memory Usage: 0.1
-  value expressions: _col1 (type: string)
-Reducer 2 
-Reduce Operator Tree:
-  Select Operator
-expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-Limit
-  Number of rows: 10
-  Statistics: Num rows: 10 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: (UDFToDouble(_col0) < 10.0) (type: boolean)
-Statistics: Num rows: 3 Data size: 30 Basic stats: 
COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 3 Data size: 30 Basic stats: 
COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.TextInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
-
 PREHOOK: query: SELECT subq.key, subq.value FROM 
 (SELECT x.* FROM SRC x ORDER BY key limit 10) subq
 where subq.key < 10



hive git commit: HIVE-14393: Tuple in list feature fails if there's only 1 tuple in the list (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-02 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master c08490b74 -> 78a90a62d


HIVE-14393: Tuple in list feature fails if there's only 1 tuple in the list 
(Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/78a90a62
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/78a90a62
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/78a90a62

Branch: refs/heads/master
Commit: 78a90a62d0823b5d25be26d5b03e0c2e4be3b58c
Parents: c08490b
Author: Pengcheng Xiong 
Authored: Tue Aug 2 09:11:03 2016 -0700
Committer: Pengcheng Xiong 
Committed: Tue Aug 2 09:11:03 2016 -0700

--
 .../hadoop/hive/ql/parse/IdentifiersParser.g|   8 +-
 .../clientpositive/multi_column_in_single.q |  67 
 .../clientpositive/multi_column_in_single.q.out | 372 +++
 3 files changed, 443 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/78a90a62/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 16251d3..0bbc4b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -470,10 +470,10 @@ expressions
 precedenceEqualExpressionMutiple
 :
 (LPAREN precedenceBitwiseOrExpression (COMMA 
precedenceBitwiseOrExpression)+ RPAREN -> ^(TOK_FUNCTION Identifier["struct"] 
precedenceBitwiseOrExpression+))
-( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN)
-   -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct+)
-| (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ 
RPAREN)
-   -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct+)))
+( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)* RPAREN)
+   -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct*)
+| (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)* 
RPAREN)
+   -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct*)))
 ;
 
 expressionsToStruct

http://git-wip-us.apache.org/repos/asf/hive/blob/78a90a62/ql/src/test/queries/clientpositive/multi_column_in_single.q
--
diff --git a/ql/src/test/queries/clientpositive/multi_column_in_single.q 
b/ql/src/test/queries/clientpositive/multi_column_in_single.q
new file mode 100644
index 000..ca2d16c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_column_in_single.q
@@ -0,0 +1,67 @@
+set hive.mapred.mode=nonstrict;
+
+select * from src where (key, value) in (('238','val_238'));
+
+drop table emps;
+
+create table emps (empno int, deptno int, empname string);
+
+insert into table emps values 
(1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22");
+
+select * from emps;
+
+select * from emps where (int(empno+deptno/2), int(deptno/3)) in ((3,2));
+
+select * from emps where (int(empno+deptno/2), int(deptno/3)) not in ((3,2));
+
+select * from emps where (empno,deptno) in ((3,2));
+
+select * from emps where (empno,deptno) not in ((3,2));
+
+select * from emps where (empno,deptno) in ((1,3));
+
+select * from emps where (empno,deptno) not in ((1,3));
+
+explain
+select * from emps where (empno+1,deptno) in ((3,2));
+
+explain 
+select * from emps where (empno+1,deptno) not in ((3,2));
+
+explain select * from emps where ((empno*2)|1,deptno) in ((empno+2,2));
+
+select * from emps where ((empno*2)|1,deptno) in ((empno+2,2));
+
+select (empno*2)|1,substr(empname,1,1) from emps;
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+2,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) not in 
((empno+2,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+3,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) not in 
((empno+3,'2'));
+
+
+select sum(empno), empname from emps where ((empno*2)|1,substr(empname,1,1)) 
in ((empno+3,'2'))
+group by empname;
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((3,2));
+
+drop view v;
+
+create view v as 
+select * from(
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((3,2)))subq order by empno desc;
+
+select * from v;
+
+select subq.e1 from 
+(select (empno*2)|1 as e1, substr(empname,1,1) as n1 from emps)subq
+join
+(select empno a

hive git commit: HIVE-14393: Tuple in list feature fails if there's only 1 tuple in the list (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-02 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.1 ae8adaae6 -> 7a9003f32


HIVE-14393: Tuple in list feature fails if there's only 1 tuple in the list 
(Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7a9003f3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7a9003f3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7a9003f3

Branch: refs/heads/branch-2.1
Commit: 7a9003f329e0c11cd6996ac351bcf69fc785d759
Parents: ae8adaa
Author: Pengcheng Xiong 
Authored: Tue Aug 2 09:11:03 2016 -0700
Committer: Pengcheng Xiong 
Committed: Tue Aug 2 09:12:34 2016 -0700

--
 .../hadoop/hive/ql/parse/IdentifiersParser.g|   8 +-
 .../clientpositive/multi_column_in_single.q |  67 
 .../clientpositive/multi_column_in_single.q.out | 372 +++
 3 files changed, 443 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/7a9003f3/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index a1909a7..8e0beba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -472,10 +472,10 @@ expressions
 precedenceEqualExpressionMutiple
 :
 (LPAREN precedenceBitwiseOrExpression (COMMA 
precedenceBitwiseOrExpression)+ RPAREN -> ^(TOK_FUNCTION Identifier["struct"] 
precedenceBitwiseOrExpression+))
-( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN)
-   -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct+)
-| (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ 
RPAREN)
-   -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct+)))
+( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)* RPAREN)
+   -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct*)
+| (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)* 
RPAREN)
+   -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple 
expressionsToStruct*)))
 ;
 
 expressionsToStruct

http://git-wip-us.apache.org/repos/asf/hive/blob/7a9003f3/ql/src/test/queries/clientpositive/multi_column_in_single.q
--
diff --git a/ql/src/test/queries/clientpositive/multi_column_in_single.q 
b/ql/src/test/queries/clientpositive/multi_column_in_single.q
new file mode 100644
index 000..ca2d16c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_column_in_single.q
@@ -0,0 +1,67 @@
+set hive.mapred.mode=nonstrict;
+
+select * from src where (key, value) in (('238','val_238'));
+
+drop table emps;
+
+create table emps (empno int, deptno int, empname string);
+
+insert into table emps values 
(1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22");
+
+select * from emps;
+
+select * from emps where (int(empno+deptno/2), int(deptno/3)) in ((3,2));
+
+select * from emps where (int(empno+deptno/2), int(deptno/3)) not in ((3,2));
+
+select * from emps where (empno,deptno) in ((3,2));
+
+select * from emps where (empno,deptno) not in ((3,2));
+
+select * from emps where (empno,deptno) in ((1,3));
+
+select * from emps where (empno,deptno) not in ((1,3));
+
+explain
+select * from emps where (empno+1,deptno) in ((3,2));
+
+explain 
+select * from emps where (empno+1,deptno) not in ((3,2));
+
+explain select * from emps where ((empno*2)|1,deptno) in ((empno+2,2));
+
+select * from emps where ((empno*2)|1,deptno) in ((empno+2,2));
+
+select (empno*2)|1,substr(empname,1,1) from emps;
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+2,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) not in 
((empno+2,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+3,'2'));
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) not in 
((empno+3,'2'));
+
+
+select sum(empno), empname from emps where ((empno*2)|1,substr(empname,1,1)) 
in ((empno+3,'2'))
+group by empname;
+
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((3,2));
+
+drop view v;
+
+create view v as 
+select * from(
+select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+3,'2'))
+union
+select * from emps where (empno,deptno) in ((3,2)))subq order by empno desc;
+
+select * from v;
+
+select subq.e1 from 
+(select (empno*2)|1 as e1, substr(empname,1,1) as n1 from emps)subq
+join
+(select

hive git commit: HIVE-14424: Address CLIRestoreTest failure (Rajat Khandelwal, reviewed by Pengcheng Xiong)

2016-08-05 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.1 c400e44f1 -> 4e2cc161a


HIVE-14424: Address CLIRestoreTest failure (Rajat Khandelwal, reviewed by 
Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4e2cc161
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4e2cc161
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4e2cc161

Branch: refs/heads/branch-2.1
Commit: 4e2cc161a2565b5985e04a96e108618dc2a597c3
Parents: c400e44
Author: Pengcheng Xiong 
Authored: Fri Aug 5 10:57:04 2016 -0700
Committer: Pengcheng Xiong 
Committed: Fri Aug 5 10:57:49 2016 -0700

--
 .../test/org/apache/hive/service/cli/CLIServiceRestoreTest.java | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4e2cc161/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
--
diff --git 
a/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java 
b/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
index af0cc40..a049440 100644
--- a/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
+++ b/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
@@ -44,8 +44,11 @@ public class CLIServiceRestoreTest {
   }
 
   public CLIService getService() {
+HiveConf conf = new HiveConf();
+conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
+  
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
 CLIService service = new CLIService(null);
-service.init(new HiveConf());
+service.init(conf);
 service.start();
 return service;
   }



hive git commit: HIVE-14424: Address CLIRestoreTest failure (Rajat Khandelwal, reviewed by Pengcheng Xiong)

2016-08-05 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 80892ca28 -> a099a5cd2


HIVE-14424: Address CLIRestoreTest failure (Rajat Khandelwal, reviewed by 
Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a099a5cd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a099a5cd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a099a5cd

Branch: refs/heads/master
Commit: a099a5cd26d7f07aa221b15a64f4b9485b2ca0e3
Parents: 80892ca
Author: Pengcheng Xiong 
Authored: Fri Aug 5 10:57:04 2016 -0700
Committer: Pengcheng Xiong 
Committed: Fri Aug 5 10:57:04 2016 -0700

--
 .../test/org/apache/hive/service/cli/CLIServiceRestoreTest.java | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a099a5cd/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
--
diff --git 
a/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java 
b/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
index af0cc40..a049440 100644
--- a/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
+++ b/service/src/test/org/apache/hive/service/cli/CLIServiceRestoreTest.java
@@ -44,8 +44,11 @@ public class CLIServiceRestoreTest {
   }
 
   public CLIService getService() {
+HiveConf conf = new HiveConf();
+conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
+  
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
 CLIService service = new CLIService(null);
-service.init(new HiveConf());
+service.init(conf);
 service.start();
 return service;
   }



[1/3] hive git commit: HIVE-12656: Turn hive.compute.query.using.stats on by default (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-16 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 0e05914b4 -> 8763c7aa2


http://git-wip-us.apache.org/repos/asf/hive/blob/8763c7aa/ql/src/test/results/clientpositive/spark/union_view.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union_view.q.out 
b/ql/src/test/results/clientpositive/spark/union_view.q.out
index f06b080..892cc6f 100644
--- a/ql/src/test/results/clientpositive/spark/union_view.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_view.q.out
@@ -103,149 +103,32 @@ STAGE PLANS:
 86 val_86  3
 86 val_86  3
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-Spark
-  Edges:
-Reducer 2 <- Map 1 (GROUP, 1)
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: src_union_1
-  filterExpr: (ds = '1') (type: boolean)
-  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-Group By Operator
-  aggregations: count(1)
-  mode: hash
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col0 (type: bigint)
-Reducer 2 
-Reduce Operator Tree:
-  Group By Operator
-aggregations: count(VALUE._col0)
-mode: mergepartial
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
   Stage: Stage-0
 Fetch Operator
-  limit: -1
+  limit: 1
   Processor Tree:
 ListSink
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-Spark
-  Edges:
-Reducer 2 <- Map 1 (GROUP, 1)
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: src_union_2
-  filterExpr: (ds = '2') (type: boolean)
-  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
-Group By Operator
-  aggregations: count(1)
-  mode: hash
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col0 (type: bigint)
-Reducer 2 
-Reduce Operator Tree:
-  Group By Operator
-aggregations: count(VALUE._col0)
-mode: mergepartial
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
   Stage: Stage-0
 Fetch Operator
-  limit: -1
+  limit: 1
   Processor Tree:
 ListSink
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-Spark
-  Edges:
-   

[2/3] hive git commit: HIVE-12656: Turn hive.compute.query.using.stats on by default (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-16 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/8763c7aa/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
--
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out 
b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
index d24ee16..ce3fad2 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
@@ -1107,53 +1107,37 @@ Storage Desc Params:
 PREHOOK: query: select count(*) from over1k_part
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1k_part
-PREHOOK: Input: default@over1k_part@ds=foo/t=27
-PREHOOK: Input: default@over1k_part@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 POSTHOOK: query: select count(*) from over1k_part
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1k_part
-POSTHOOK: Input: default@over1k_part@ds=foo/t=27
-POSTHOOK: Input: default@over1k_part@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 38
 PREHOOK: query: select count(*) from over1k_part_limit
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1k_part_limit
-PREHOOK: Input: default@over1k_part_limit@ds=foo/t=27
-PREHOOK: Input: default@over1k_part_limit@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 POSTHOOK: query: select count(*) from over1k_part_limit
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1k_part_limit
-POSTHOOK: Input: default@over1k_part_limit@ds=foo/t=27
-POSTHOOK: Input: default@over1k_part_limit@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 20
 PREHOOK: query: select count(*) from over1k_part_buck
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1k_part_buck
-PREHOOK: Input: default@over1k_part_buck@t=27
-PREHOOK: Input: default@over1k_part_buck@t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 POSTHOOK: query: select count(*) from over1k_part_buck
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1k_part_buck
-POSTHOOK: Input: default@over1k_part_buck@t=27
-POSTHOOK: Input: default@over1k_part_buck@t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 38
 PREHOOK: query: select count(*) from over1k_part_buck_sort
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1k_part_buck_sort
-PREHOOK: Input: default@over1k_part_buck_sort@t=27
-PREHOOK: Input: default@over1k_part_buck_sort@t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 POSTHOOK: query: select count(*) from over1k_part_buck_sort
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1k_part_buck_sort
-POSTHOOK: Input: default@over1k_part_buck_sort@t=27
-POSTHOOK: Input: default@over1k_part_buck_sort@t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 38
 PREHOOK: query: -- tests for HIVE-6883
@@ -1649,14 +1633,10 @@ POSTHOOK: Input: 
default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__
 PREHOOK: query: select count(*) from over1k_part2
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1k_part2
-PREHOOK: Input: default@over1k_part2@ds=foo/t=27
-PREHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 POSTHOOK: query: select count(*) from over1k_part2
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1k_part2
-POSTHOOK: Input: default@over1k_part2@ds=foo/t=27
-POSTHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 19
 PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) 
select si,i,b,f,t from over1k where t is null or t=27 order by i
@@ -1796,14 +1776,10 @@ POSTHOOK: Input: 
default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__
 PREHOOK: query: select count(*) from over1k_part2
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1k_part2
-PREHOOK: Input: default@over1k_part2@ds=foo/t=27
-PREHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 POSTHOOK: query: select count(*) from over1k_part2
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1k_part2
-POSTHOOK: Input: default@over1k_part2@ds=foo/t=27
-POSTHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 19
 PREHOOK: query: -- hadoop-1 does not honor number of reducers in local mode. 
There is always only 1 reducer irrespective of the number of buckets.
@@ -2083,14 +2059,10 @@ POSTHOOK: Input: 
default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__
 PREHOOK: query: select count(*) from over1k_part_buck_sort2
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1k_part_buck_sort2
-PREHOOK: Input: default@over1k_part_buck_sort2@t=27
-PREHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__
  A masked pattern was here 
 POSTHOOK: query: select count(*) from over1k_part_buck_sort2
 POSTHOOK: type: QUERY
 POSTHOOK: Input: def

[3/3] hive git commit: HIVE-12656: Turn hive.compute.query.using.stats on by default (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-16 Thread pxiong
HIVE-12656: Turn hive.compute.query.using.stats on by default (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8763c7aa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8763c7aa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8763c7aa

Branch: refs/heads/master
Commit: 8763c7aa287b8a07c54d22a227bc9d368aa8b626
Parents: 0e05914
Author: Pengcheng Xiong 
Authored: Tue Aug 16 15:20:33 2016 -0700
Committer: Pengcheng Xiong 
Committed: Tue Aug 16 15:20:33 2016 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +-
 .../test/queries/clientnegative/insert_into6.q  |   1 +
 .../lockneg_query_tbl_in_locked_db.q|   1 +
 .../queries/clientpositive/alter_merge_orc.q|   1 +
 .../clientpositive/bucketizedhiveinputformat.q  |   1 +
 .../test/queries/clientpositive/cbo_udf_udaf.q  |   1 +
 .../clientpositive/dynamic_partition_pruning.q  |   1 +
 .../dynpart_sort_opt_vectorization.q|   1 +
 .../clientpositive/dynpart_sort_optimization.q  |   1 +
 ql/src/test/queries/clientpositive/escape1.q|   1 +
 ql/src/test/queries/clientpositive/escape2.q|   1 +
 .../queries/clientpositive/orc_llap_counters.q  |   1 +
 ql/src/test/queries/clientpositive/orc_merge1.q |   1 +
 .../test/queries/clientpositive/orc_merge10.q   |   1 +
 .../queries/clientpositive/orc_merge_diff_fs.q  |   1 +
 .../test/queries/clientpositive/orc_ppd_basic.q |   1 +
 .../clientpositive/partition_coltype_literals.q |   1 +
 .../clientpositive/stats_aggregator_error_1.q   |   1 +
 .../clientpositive/stats_publisher_error_1.q|   1 +
 .../clientpositive/symlink_text_input_format.q  |   2 +
 ql/src/test/queries/clientpositive/tez_union.q  |   1 +
 .../queries/clientpositive/vector_complex_all.q |   2 +
 .../vectorization_short_regress.q   |   1 +
 .../vectorized_dynamic_partition_pruning.q  |   1 +
 .../clientpositive/alter_merge_2_orc.q.out  |   2 -
 .../clientpositive/alter_merge_orc.q.out|   2 -
 .../alter_partition_coltype.q.out   | 384 +---
 .../clientpositive/annotate_stats_select.q.out  |  72 +--
 .../clientpositive/avro_partitioned.q.out   |  16 -
 .../bucketsortoptimize_insert_1.q.out   |   4 -
 .../bucketsortoptimize_insert_3.q.out   |   4 -
 .../clientpositive/cbo_rp_udf_udaf.q.out|   2 -
 .../cbo_rp_udf_udaf_stats_opt.q.out |   2 -
 .../results/clientpositive/cbo_udf_udaf.q.out   |   2 -
 .../test/results/clientpositive/combine2.q.out  | 444 +--
 .../dynpart_sort_opt_vectorization.q.out| 104 +
 .../dynpart_sort_optimization.q.out |  32 --
 .../clientpositive/explain_dependency2.q.out|   2 +-
 .../results/clientpositive/fileformat_mix.q.out |   4 -
 .../test/results/clientpositive/fold_case.q.out |  36 +-
 .../test/results/clientpositive/input24.q.out   |  38 +-
 .../list_bucket_query_multiskew_1.q.out |   2 -
 .../list_bucket_query_multiskew_2.q.out |   2 -
 .../list_bucket_query_multiskew_3.q.out | 109 +
 .../merge_dynamic_partition4.q.out  |   4 -
 .../merge_dynamic_partition5.q.out  |   4 -
 .../results/clientpositive/orc_merge1.q.out |  12 -
 .../results/clientpositive/orc_merge10.q.out|  12 -
 .../clientpositive/partition_boolexpr.q.out | 144 +-
 .../results/clientpositive/partition_date.q.out |  32 --
 .../clientpositive/partition_decode_name.q.out  |  12 -
 .../clientpositive/partition_special_char.q.out |  12 -
 .../clientpositive/partition_timestamp.q.out|  28 --
 .../clientpositive/partition_varchar1.q.out |  22 -
 .../test/results/clientpositive/plan_json.q.out |   2 +-
 .../clientpositive/ppd_constant_where.q.out |  40 +-
 .../rename_partition_location.q.out |   1 -
 .../clientpositive/select_unquote_and.q.out |   4 -
 .../clientpositive/select_unquote_not.q.out |   4 -
 .../clientpositive/select_unquote_or.q.out  |   4 -
 .../results/clientpositive/smb_mapjoin_18.q.out |   6 -
 .../results/clientpositive/smb_mapjoin_19.q.out |   4 -
 .../results/clientpositive/smb_mapjoin_20.q.out |   4 -
 .../spark/list_bucket_dml_2.q.out   |   6 -
 .../clientpositive/spark/smb_mapjoin_18.q.out   |   6 -
 .../clientpositive/spark/smb_mapjoin_19.q.out   |   4 -
 .../clientpositive/spark/smb_mapjoin_20.q.out   |   4 -
 .../results/clientpositive/spark/stats3.q.out   |   4 -
 .../clientpositive/spark/stats_noscan_2.q.out   |   2 -
 .../clientpositive/spark/union_view.q.out   | 129 +-
 ql/src/test/results/clientpositive/stats3.q.out |   4 -
 .../results/clientpositive/stats_noscan_2.q.out |   2 -
 .../test/results/clientpositive/udf_count.q.out | 110 +
 .../results/clientpositive/union_view.q.out | 183 +---
 .../clientpositive/updateAccessTime.q.o

hive git commit: HIVE-12656: Turn hive.compute.query.using.stats on by default (Pengcheng Xiong, reviewed by Ashutosh Chauhan) (addendum)

2016-08-17 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 8763c7aa2 -> be7329582


HIVE-12656: Turn hive.compute.query.using.stats on by default (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan) (addendum)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/be732958
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/be732958
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/be732958

Branch: refs/heads/master
Commit: be73295829c6643de8db2a6d1d83beec8300110c
Parents: 8763c7a
Author: Pengcheng Xiong 
Authored: Wed Aug 17 15:35:06 2016 -0700
Committer: Pengcheng Xiong 
Committed: Wed Aug 17 15:35:06 2016 -0700

--
 .../clientpositive/alter_merge_orc.q.out|   2 +
 .../results/clientpositive/cbo_udf_udaf.q.out   |   2 +
 .../dynpart_sort_opt_vectorization.q.out| 104 ++-
 .../dynpart_sort_optimization.q.out |  32 ++
 .../clientpositive/list_bucket_dml_1.q.out  |   8 --
 .../clientpositive/list_bucket_dml_2.q.out  |   6 --
 .../clientpositive/list_bucket_dml_3.q.out  |   6 --
 .../clientpositive/list_bucket_dml_4.q.out  |   6 --
 .../clientpositive/list_bucket_dml_5.q.out  |   8 --
 .../clientpositive/list_bucket_dml_6.q.out  |   8 --
 .../clientpositive/list_bucket_dml_7.q.out  |   8 --
 .../clientpositive/list_bucket_dml_8.q.out  |   4 -
 .../clientpositive/list_bucket_dml_9.q.out  |   6 --
 .../results/clientpositive/orc_merge1.q.out |  12 +++
 .../results/clientpositive/orc_merge10.q.out|  12 +++
 .../clientpositive/tez/alter_merge_2_orc.q.out  |   2 -
 .../clientpositive/tez/load_dyn_part2.q.out |  10 +-
 17 files changed, 165 insertions(+), 71 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/be732958/ql/src/test/results/clientpositive/alter_merge_orc.q.out
--
diff --git a/ql/src/test/results/clientpositive/alter_merge_orc.q.out 
b/ql/src/test/results/clientpositive/alter_merge_orc.q.out
index aa83fce..b5a6d04 100644
--- a/ql/src/test/results/clientpositive/alter_merge_orc.q.out
+++ b/ql/src/test/results/clientpositive/alter_merge_orc.q.out
@@ -179,10 +179,12 @@ minFileSize:2515
 PREHOOK: query: select count(1) from src_orc_merge_test_part
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src_orc_merge_test_part
+PREHOOK: Input: default@src_orc_merge_test_part@ds=2011
  A masked pattern was here 
 POSTHOOK: query: select count(1) from src_orc_merge_test_part
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src_orc_merge_test_part
+POSTHOOK: Input: default@src_orc_merge_test_part@ds=2011
  A masked pattern was here 
 1500
 PREHOOK: query: select sum(hash(key)), sum(hash(value)) from 
src_orc_merge_test_part

http://git-wip-us.apache.org/repos/asf/hive/blob/be732958/ql/src/test/results/clientpositive/cbo_udf_udaf.q.out
--
diff --git a/ql/src/test/results/clientpositive/cbo_udf_udaf.q.out 
b/ql/src/test/results/clientpositive/cbo_udf_udaf.q.out
index b30d9da..156d02f 100644
--- a/ql/src/test/results/clientpositive/cbo_udf_udaf.q.out
+++ b/ql/src/test/results/clientpositive/cbo_udf_udaf.q.out
@@ -53,10 +53,12 @@ POSTHOOK: Input: default@cbo_t1@dt=2014
 PREHOOK: query: select f,a,e,b from (select count(*) as a, count(c_int) as b, 
sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f from cbo_t1) 
cbo_t1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@cbo_t1
+PREHOOK: Input: default@cbo_t1@dt=2014
  A masked pattern was here 
 POSTHOOK: query: select f,a,e,b from (select count(*) as a, count(c_int) as b, 
sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f from cbo_t1) 
cbo_t1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@cbo_t1
+POSTHOOK: Input: default@cbo_t1@dt=2014
  A masked pattern was here 
 1  20  1   18
 PREHOOK: query: select f,a,e,b from (select count(*) as a, count(distinct 
c_int) as b, sum(distinct c_int) as c, avg(distinct c_int) as d, max(distinct 
c_int) as e, min(distinct c_int) as f from cbo_t1) cbo_t1

http://git-wip-us.apache.org/repos/asf/hive/blob/be732958/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out 
b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
index cbfc7be..fc4f483 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
@@ -1158,37 +1158,53 @@ Storage Desc Params:
 PREHOOK: query: select count(*) from over1k_part_orc
 PREHOOK: type: QUERY
 PREHOOK: Input:

hive git commit: HIVE-14563: StatsOptimizer treats NULL in a wrong way (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-19 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master b04f7ef39 -> 542641d0f


HIVE-14563: StatsOptimizer treats NULL in a wrong way (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/542641d0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/542641d0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/542641d0

Branch: refs/heads/master
Commit: 542641d0f7da53e7a6da6cd45ddbaf2cf865c0b3
Parents: b04f7ef
Author: Pengcheng Xiong 
Authored: Fri Aug 19 11:27:46 2016 -0700
Committer: Pengcheng Xiong 
Committed: Fri Aug 19 11:27:46 2016 -0700

--
 .../hive/beeline/TestBeeLineWithArgs.java   |  9 ++--
 .../apache/hive/jdbc/TestJdbcWithMiniHS2.java   |  1 +
 .../hive/ql/optimizer/StatsOptimizer.java   | 56 +---
 .../clientpositive/stats_null_optimizer.q   |  3 ++
 .../clientpositive/stats_null_optimizer.q.out   | 23 
 .../cli/session/TestSessionManagerMetrics.java  |  1 +
 6 files changed, 70 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
index 892c733..49c1120 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
@@ -73,6 +73,7 @@ public class TestBeeLineWithArgs {
 HiveConf hiveConf = new HiveConf();
 // Set to non-zk lock manager to prevent HS2 from trying to connect
 hiveConf.setVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER, 
"org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
+hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
 miniHS2 = new MiniHS2(hiveConf);
 miniHS2.start(new HashMap());
 createTable();
@@ -775,10 +776,10 @@ public class TestBeeLineWithArgs {
 String embeddedJdbcURL = Utils.URL_PREFIX+"/Default";
 List argList = getBaseArgs(embeddedJdbcURL);
 // Set to non-zk lock manager to avoid trying to connect to zookeeper
-final String SCRIPT_TEXT =
-"set 
hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n" +
-"create table if not exists embeddedBeelineOutputs(d int);\n" +
-"set a=1;\nselect count(*) from embeddedBeelineOutputs;\n";
+final String SCRIPT_TEXT = "set 
hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n"
++ "set hive.compute.query.using.stats=false;\n"
++ "create table if not exists embeddedBeelineOutputs(d int);\n"
++ "set a=1;\nselect count(*) from embeddedBeelineOutputs;\n";
 final String EXPECTED_PATTERN = "Stage-1 map =";
 testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
index 0dcfa49..0249566 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
@@ -548,6 +548,7 @@ public class TestJdbcWithMiniHS2 {
 HiveConf conf = new HiveConf();
 String userName;
 setSerializeInTasksInConf(conf);
+conf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
 miniHS2 = new MiniHS2(conf);
 Map confOverlay = new HashMap();
 miniHS2.start(confOverlay);

http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 0c17246..17510e9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -371,20 +371,37 @@ public class StatsOptimizer extends Transform {
   else if (udaf instanceof GenericUDAFCount) {
 // always long
 Long rowCnt = 0L;
-if (aggr.getParameters().isEmpty() || aggr.getParameters().get(0) 
instanceof
-ExprNodeConstantDesc || ((aggr.getParame

hive git commit: HIVE-14563: StatsOptimizer treats NULL in a wrong way (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-19 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.1 b16c4f3a4 -> 17b3bcd4f


HIVE-14563: StatsOptimizer treats NULL in a wrong way (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/17b3bcd4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/17b3bcd4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/17b3bcd4

Branch: refs/heads/branch-2.1
Commit: 17b3bcd4feb4e4ecbeab3d5b30bb4c234c197b1c
Parents: b16c4f3
Author: Pengcheng Xiong 
Authored: Fri Aug 19 11:27:46 2016 -0700
Committer: Pengcheng Xiong 
Committed: Fri Aug 19 14:10:37 2016 -0700

--
 .../hive/beeline/TestBeeLineWithArgs.java   |  9 ++--
 .../apache/hive/jdbc/TestJdbcWithMiniHS2.java   |  1 +
 .../hive/ql/optimizer/StatsOptimizer.java   | 56 +---
 .../clientpositive/stats_null_optimizer.q   |  3 ++
 .../clientpositive/stats_null_optimizer.q.out   | 23 
 .../cli/session/TestSessionManagerMetrics.java  |  1 +
 6 files changed, 70 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/17b3bcd4/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
index ecfeddb..ae68f62 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
@@ -73,6 +73,7 @@ public class TestBeeLineWithArgs {
 HiveConf hiveConf = new HiveConf();
 // Set to non-zk lock manager to prevent HS2 from trying to connect
 hiveConf.setVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER, 
"org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
+hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
 miniHS2 = new MiniHS2(hiveConf);
 miniHS2.start(new HashMap());
 createTable();
@@ -775,10 +776,10 @@ public class TestBeeLineWithArgs {
 String embeddedJdbcURL = Utils.URL_PREFIX+"/Default";
 List argList = getBaseArgs(embeddedJdbcURL);
 // Set to non-zk lock manager to avoid trying to connect to zookeeper
-final String SCRIPT_TEXT =
-"set 
hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n" +
-"create table if not exists embeddedBeelineOutputs(d int);\n" +
-"set a=1;\nselect count(*) from embeddedBeelineOutputs;\n";
+final String SCRIPT_TEXT = "set 
hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n"
++ "set hive.compute.query.using.stats=false;\n"
++ "create table if not exists embeddedBeelineOutputs(d int);\n"
++ "set a=1;\nselect count(*) from embeddedBeelineOutputs;\n";
 final String EXPECTED_PATTERN = "Stage-1 map =";
 testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/17b3bcd4/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
index d4d3f33..0db 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
@@ -477,6 +477,7 @@ public class TestJdbcWithMiniHS2 {
 HiveConf conf = new HiveConf();
 String userName;
 setSerializeInTasksInConf(conf);
+conf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
 miniHS2 = new MiniHS2(conf);
 Map confOverlay = new HashMap();
 miniHS2.start(confOverlay);

http://git-wip-us.apache.org/repos/asf/hive/blob/17b3bcd4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 0c17246..17510e9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -371,20 +371,37 @@ public class StatsOptimizer extends Transform {
   else if (udaf instanceof GenericUDAFCount) {
 // always long
 Long rowCnt = 0L;
-if (aggr.getParameters().isEmpty() || aggr.getParameters().get(0) 
instanceof
-ExprNodeConstantDesc || ((aggr.g

hive git commit: HIVE-14511: Improve MSCK for partitioned table to deal with special cases (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-08-22 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 127898f52 -> 477728956


HIVE-14511: Improve MSCK for partitioned table to deal with special cases 
(Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/47772895
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/47772895
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/47772895

Branch: refs/heads/master
Commit: 4777289561340471c4d40184c3fa23d9a35b2599
Parents: 127898f
Author: Pengcheng Xiong 
Authored: Mon Aug 22 23:51:38 2016 -0700
Committer: Pengcheng Xiong 
Committed: Mon Aug 22 23:51:38 2016 -0700

--
 .../hive/ql/metadata/HiveMetaStoreChecker.java  | 103 ++-
 .../ql/metadata/TestHiveMetaStoreChecker.java   |  20 +---
 .../test/queries/clientnegative/msck_repair_1.q |  17 +++
 .../test/queries/clientnegative/msck_repair_2.q |  18 
 .../test/queries/clientnegative/msck_repair_3.q |  19 
 .../test/queries/clientpositive/msck_repair_1.q |  18 
 .../test/queries/clientpositive/msck_repair_2.q |  20 
 .../test/queries/clientpositive/msck_repair_3.q |  17 +++
 .../results/clientnegative/msck_repair_1.q.out  |  19 
 .../results/clientnegative/msck_repair_2.q.out  |  19 
 .../results/clientnegative/msck_repair_3.q.out  |  19 
 .../results/clientpositive/msck_repair_1.q.out  |  39 +++
 .../results/clientpositive/msck_repair_2.q.out  |  39 +++
 .../results/clientpositive/msck_repair_3.q.out  |  39 +++
 14 files changed, 364 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/47772895/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
index a164b12..34b76b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
@@ -310,7 +310,7 @@ public class HiveMetaStoreChecker {
 // now check the table folder and see if we find anything
 // that isn't in the metastore
 Set allPartDirs = new HashSet();
-getAllLeafDirs(tablePath, allPartDirs);
+checkPartitionDirs(tablePath, allPartDirs, table.getPartCols().size());
 // don't want the table dir
 allPartDirs.remove(tablePath);
 
@@ -358,21 +358,25 @@ public class HiveMetaStoreChecker {
   }
 
   /**
-   * Recursive method to get the leaf directories of a base path. Example:
-   * base/dir1/dir2 base/dir3
-   *
-   * This will return dir2 and dir3 but not dir1.
+   * Assume that depth is 2, i.e., partition columns are a and b
+   * tblPath/a=1  => throw exception
+   * tblPath/a=1/file => throw exception
+   * tblPath/a=1/b=2/file => return a=1/b=2
+   * tblPath/a=1/b=2/c=3 => return a=1/b=2
+   * tblPath/a=1/b=2/c=3/file => return a=1/b=2
*
* @param basePath
*  Start directory
* @param allDirs
*  This set will contain the leaf paths at the end.
+   * @param maxDepth
+   *  Specify how deep the search goes.
* @throws IOException
*   Thrown if we can't get lists from the fs.
* @throws HiveException 
*/
 
-  private void getAllLeafDirs(Path basePath, Set allDirs) throws 
IOException, HiveException {
+  private void checkPartitionDirs(Path basePath, Set allDirs, int 
maxDepth) throws IOException, HiveException {
 ConcurrentLinkedQueue basePaths = new ConcurrentLinkedQueue<>();
 basePaths.add(basePath);
 // we only use the keySet of ConcurrentHashMap
@@ -390,33 +394,53 @@ public class HiveMetaStoreChecker {
   LOG.debug("Using threaded version of MSCK-GetPaths with number of 
threads "
   + ((ThreadPoolExecutor) pool).getPoolSize());
 }
-getAllLeafDirs(pool, basePaths, dirSet, basePath.getFileSystem(conf));
+checkPartitionDirs(pool, basePaths, dirSet, basePath.getFileSystem(conf), 
maxDepth, maxDepth);
 pool.shutdown();
 allDirs.addAll(dirSet.keySet());
   }
 
   // process the basePaths in parallel and then the next level of basePaths
-  private void getAllLeafDirs(final ExecutorService pool, final 
ConcurrentLinkedQueue basePaths,
-  final Map allDirs, final FileSystem fs) throws 
IOException, HiveException {
+  private void checkPartitionDirs(final ExecutorService pool,
+  final ConcurrentLinkedQueue basePaths, final Map 
allDirs,
+  final FileSystem fs, final int depth, final int maxDepth) throws 
IOException, HiveException {
 final ConcurrentLinkedQueue nextLevel = new 
ConcurrentLinkedQueue<>();
 if (null == pool) {
   for (final Path path : basePaths) {
  

hive git commit: HIVE-11614: CBO: Calcite Operator To Hive Operator (Calcite Return Path): ctas after order by has problem (Pengcheng Xiong, reviewd by Laljo John Pullokkaran)

2015-09-12 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master ff1f5b1a7 -> bbb912927


HIVE-11614: CBO: Calcite Operator To Hive Operator (Calcite Return Path): ctas 
after order by has problem (Pengcheng Xiong, reviewd by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bbb91292
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bbb91292
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bbb91292

Branch: refs/heads/master
Commit: bbb912927a1457daf283f3030cd873d55b93c8c3
Parents: ff1f5b1
Author: Pengcheng Xiong 
Authored: Sat Sep 12 20:27:16 2015 -0700
Committer: Pengcheng Xiong 
Committed: Sat Sep 12 20:27:16 2015 -0700

--
 .../translator/PlanModifierForReturnPath.java   |   4 -
 .../hadoop/hive/ql/parse/CalcitePlanner.java|   7 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   2 +-
 .../queries/clientpositive/cbo_rp_auto_join17.q |  14 +
 .../cbo_rp_cross_product_check_2.q  |  31 +
 .../clientpositive/cbo_rp_auto_join17.q.out | 118 
 .../cbo_rp_cross_product_check_2.q.out  | 699 +++
 7 files changed, 866 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/bbb91292/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForReturnPath.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForReturnPath.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForReturnPath.java
index 81cc474..95d692c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForReturnPath.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForReturnPath.java
@@ -34,10 +34,6 @@ public class PlanModifierForReturnPath {
 
 Pair topSelparentPair = 
HiveCalciteUtil.getTopLevelSelect(newTopNode);
 PlanModifierUtil.fixTopOBSchema(newTopNode, topSelparentPair, 
resultSchema, false);
-if (isCTAS) {
-  newTopNode = 
PlanModifierForASTConv.renameTopLevelSelectInResultSchema(newTopNode,
-  topSelparentPair, resultSchema);
-}
 
 return newTopNode;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/bbb91292/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 86bdf7e..8e992da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -193,7 +193,6 @@ import com.google.common.collect.Lists;
 public class CalcitePlanner extends SemanticAnalyzer {
 
   private final AtomicInteger noColsMissingStats = new AtomicInteger(0);
-  private List topLevelFieldSchema;
   private SemanticException semanticException;
   private boolean   runCBO = true;
 
@@ -620,7 +619,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
   rethrowCalciteException(e);
   throw new AssertionError("rethrowCalciteException didn't throw for " + 
e.getMessage());
 }
-optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, 
topLevelFieldSchema);
+optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, resultSchema);
 
 return optiqOptimizedAST;
   }
@@ -644,7 +643,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
 }
 
 RelNode modifiedOptimizedOptiqPlan = 
PlanModifierForReturnPath.convertOpTree(
-introduceProjectIfNeeded(optimizedOptiqPlan), topLevelFieldSchema, 
this.getQB()
+introduceProjectIfNeeded(optimizedOptiqPlan), resultSchema, 
this.getQB()
 .getTableDesc() != null);
 
 LOG.debug("Translating the following plan:\n" + 
RelOptUtil.toString(modifiedOptimizedOptiqPlan));
@@ -851,7 +850,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
   // 1. Gen Calcite Plan
   try {
 calciteGenPlan = genLogicalPlan(getQB(), true);
-topLevelFieldSchema = 
SemanticAnalyzer.convertRowSchemaToResultSetSchema(
+resultSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema(
 relToHiveRR.get(calciteGenPlan),
 HiveConf.getBoolVar(conf, 
HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
   } catch (SemanticException e) {

http://git-wip-us.apache.org/repos/asf/hive/blob/bbb91292/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/sr

hive git commit: HIVE-11815 : Correct the column/table names in subquery expression when creating a view (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-09-17 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master a12e5f5bb -> 8da2ed304


HIVE-11815 : Correct the column/table names in subquery expression when 
creating a view (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8da2ed30
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8da2ed30
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8da2ed30

Branch: refs/heads/master
Commit: 8da2ed304891dc8483fe3d78eda4c9f70c54ae18
Parents: a12e5f5
Author: Pengcheng Xiong 
Authored: Thu Sep 17 13:20:00 2015 -0700
Committer: Pengcheng Xiong 
Committed: Thu Sep 17 13:20:00 2015 -0700

--
 .../apache/hadoop/hive/ql/parse/QBSubQuery.java |   7 --
 .../hadoop/hive/ql/parse/SubQueryUtils.java |  11 --
 .../queries/clientpositive/subquery_views.q |  22 +++-
 .../subquery_exists_implicit_gby.q.out  |   8 +-
 .../subquery_nested_subquery.q.out  |   4 +-
 .../subquery_notexists_implicit_gby.q.out   |   8 +-
 .../subquery_windowing_corr.q.out   |   7 +-
 .../results/clientpositive/subquery_views.q.out | 116 +++
 8 files changed, 141 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index 92cbabc..f95ee8d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -401,7 +401,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
   CNT_ALIAS,
   subQryCorrExprs,
   sqRR);
-  SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin);
   return ast;
 }
 
@@ -416,7 +415,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
 public ASTNode getJoinConditionAST() {
   ASTNode ast =
   SubQueryUtils.buildNotInNullJoinCond(getAlias(), CNT_ALIAS);
-  SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin);
   return ast;
 }
 
@@ -576,8 +574,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
 
 rewrite(outerQueryRR, forHavingClause, outerQueryAlias, insertClause, 
selectClause);
 
-SubQueryUtils.setOriginDeep(subQueryAST, originalSQASTOrigin);
-
 /*
  * Restriction.13.m :: In the case of an implied Group By on a
  * correlated SubQuery, the SubQuery always returns 1 row.
@@ -696,8 +692,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
   }
 }
 
-SubQueryUtils.setOriginDeep(joinConditionAST, originalSQASTOrigin);
-SubQueryUtils.setOriginDeep(postJoinConditionAST, originalSQASTOrigin);
   }
 
   ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) {
@@ -711,7 +705,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
   return postJoinConditionAST;
 }
 ASTNode node = SubQueryUtils.andAST(outerQryFilter, postJoinConditionAST);
-node.setOrigin(originalSQASTOrigin);
 return node;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
index 87a7ced..362a285 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
@@ -467,17 +467,6 @@ public class SubQueryUtils {
 return check;
   }
 
-  static void setOriginDeep(ASTNode node, ASTNodeOrigin origin) {
-if ( node == null ) {
-  return;
-}
-node.setOrigin(origin);
-int childCnt = node.getChildCount();
-for(int i=0; ihttp://git-wip-us.apache.org/repos/asf/hive/blob/8da2ed30/ql/src/test/queries/clientpositive/subquery_views.q
--
diff --git a/ql/src/test/queries/clientpositive/subquery_views.q 
b/ql/src/test/queries/clientpositive/subquery_views.q
index f15d41b..e646310 100644
--- a/ql/src/test/queries/clientpositive/subquery_views.q
+++ b/ql/src/test/queries/clientpositive/subquery_views.q
@@ -10,6 +10,8 @@ where exists
   where b.value = a.value  and a.key = b.key and a.value > 'val_9')
 ;
 
+describe extended cv1;
+
 select * 
 from cv1 where cv1.key in (select key from cv1 c where c.key > '95');
 ;
@@ -26,6 +28,8 @@ where b.key not in
   )
 ;
 
+describe extended cv2;
+
 explain
 select * 
 from cv2 where cv2.key in (select key from cv2 c where c.key < '1

hive git commit: HIVE-11846: CliDriver shutdown tries to drop index table again which was already dropped when dropping the original table (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-09-18 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master b934a804a -> 68c0e9993


HIVE-11846: CliDriver shutdown tries to drop index table again which was 
already dropped when dropping the original table (Pengcheng Xiong, reviewed by 
Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/68c0e999
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/68c0e999
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/68c0e999

Branch: refs/heads/master
Commit: 68c0e9993c6aee85d50ce1dc8974916b1e073f67
Parents: b934a80
Author: Pengcheng Xiong 
Authored: Fri Sep 18 10:29:52 2015 -0700
Committer: Pengcheng Xiong 
Committed: Fri Sep 18 10:29:52 2015 -0700

--
 .../org/apache/hadoop/hive/ql/QTestUtil.java|   9 +-
 .../clientpositive/drop_table_with_index.q  |  35 +
 .../clientpositive/drop_table_with_index.q.out  | 152 +++
 3 files changed, 195 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/68c0e999/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
--
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
index 3fae0ba..f23bf2b 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
@@ -84,6 +84,7 @@ import 
org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
 import org.apache.hadoop.hive.ql.lockmgr.zookeeper.CuratorFrameworkSingleton;
 import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
 import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
@@ -670,7 +671,13 @@ public class QTestUtil {
   SessionState.get().setCurrentDatabase(dbName);
   for (String tblName : db.getAllTables()) {
 if (!DEFAULT_DATABASE_NAME.equals(dbName) || 
!srcTables.contains(tblName)) {
-  Table tblObj = db.getTable(tblName);
+  Table tblObj = null;
+  try {
+tblObj = db.getTable(tblName);
+  } catch (InvalidTableException e) {
+LOG.warn("Trying to drop table " + e.getTableName() + ". But it 
does not exist.");
+continue;
+  }
   // dropping index table can not be dropped directly. Dropping the 
base
   // table will automatically drop all its index table
   if(tblObj.isIndexTable()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/68c0e999/ql/src/test/queries/clientpositive/drop_table_with_index.q
--
diff --git a/ql/src/test/queries/clientpositive/drop_table_with_index.q 
b/ql/src/test/queries/clientpositive/drop_table_with_index.q
new file mode 100644
index 000..1790664
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/drop_table_with_index.q
@@ -0,0 +1,35 @@
+set hive.stats.dbclass=fs;
+set hive.stats.autogather=true;
+set hive.cbo.enable=true;
+
+DROP TABLE IF EXISTS aa;
+CREATE TABLE aa (L_ORDERKEY  INT,
+L_PARTKEY   INT,
+L_SUPPKEY   INT,
+L_LINENUMBERINT,
+L_QUANTITY  DOUBLE,
+L_EXTENDEDPRICE DOUBLE,
+L_DISCOUNT  DOUBLE,
+L_TAX   DOUBLE,
+L_RETURNFLAGSTRING,
+L_LINESTATUSSTRING,
+l_shipdate  STRING,
+L_COMMITDATESTRING,
+L_RECEIPTDATE   STRING,
+L_SHIPINSTRUCT  STRING,
+L_SHIPMODE  STRING,
+L_COMMENT   STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE aa;
+
+CREATE INDEX aa_lshipdate_idx ON TABLE aa(l_shipdate) AS 
'org.apache.hadoop.hive.ql.index.AggregateIndexHandler' WITH DEFERRED REBUILD 
IDXPROPERTIES("AGGREGATES"="count(l_shipdate)");
+ALTER INDEX aa_lshipdate_idx ON aa REBUILD;
+
+show tables;
+
+explain select l_shipdate, count(l_shipdate)
+from aa
+group by l_shipdate;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/68c0e999/ql/src/test/results/clientpositive/drop_table_with_index.q.out

[2/2] hive git commit: HIVE-10209 - FetchTask with VC may fail because ExecMapper.done is true (Chao)

2015-09-24 Thread pxiong
HIVE-10209 - FetchTask with VC may fail because ExecMapper.done is true (Chao)

git-svn-id: https://svn.apache.org/repos/asf/hive/branches/spark@1672509 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2801d2c4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2801d2c4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2801d2c4

Branch: refs/heads/branch-1.0
Commit: 2801d2c4b1a61315ae7f28c0ea825580e30f411b
Parents: a7618df
Author: Sun Chao 
Authored: Thu Apr 9 22:30:15 2015 +
Committer: Pengcheng Xiong 
Committed: Thu Sep 24 20:35:14 2015 -0700

--
 ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2801d2c4/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
index c4f04cb..3e2187d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.CommandNeedRetryException;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.QueryPlan;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -79,6 +80,7 @@ public class FetchTask extends Task implements 
Serializable {
   fetch = new FetchOperator(work, job, source, getVirtualColumns(source));
   source.initialize(conf, new 
ObjectInspector[]{fetch.getOutputObjectInspector()});
   totalRows = 0;
+  ExecMapper.setDone(false);
 
 } catch (Exception e) {
   // Bail out ungracefully - we should never hit



[1/2] hive git commit: HIVE-10083 - SMBJoin fails in case one table is uninitialized (Na via Chao)

2015-09-24 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 3dccb381e -> 2801d2c4b


HIVE-10083 - SMBJoin fails in case one table is uninitialized (Na via Chao)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1670718 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a7618dfb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a7618dfb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a7618dfb

Branch: refs/heads/branch-1.0
Commit: a7618dfb9f93eab922f1939680dca4ae5d5a8f6b
Parents: 3dccb38
Author: Sun Chao 
Authored: Wed Apr 1 16:27:50 2015 +
Committer: Pengcheng Xiong 
Committed: Thu Sep 24 17:10:50 2015 -0700

--
 .../ql/optimizer/AbstractBucketJoinProc.java| 24 +++-
 1 file changed, 13 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a7618dfb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
index 70c23a6..13ede1b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
@@ -479,17 +479,19 @@ abstract public class AbstractBucketJoinProc implements 
NodeProcessor {
   for (int sindex = 0; sindex < smallTblBucketNums.size(); sindex++) {
 int smallTblBucketNum = smallTblBucketNums.get(sindex);
 List smallTblFileNames = smallTblFilesList.get(sindex);
-if (bigTblBucketNum >= smallTblBucketNum) {
-  // if the big table has more buckets than the current small table,
-  // use "MOD" to get small table bucket names. For example, if the big
-  // table has 4 buckets and the small table has 2 buckets, then the
-  // mapping should be 0->0, 1->1, 2->0, 3->1.
-  int toAddSmallIndex = bindex % smallTblBucketNum;
-  resultFileNames.add(smallTblFileNames.get(toAddSmallIndex));
-} else {
-  int jump = smallTblBucketNum / bigTblBucketNum;
-  for (int i = bindex; i < smallTblFileNames.size(); i = i + jump) {
-resultFileNames.add(smallTblFileNames.get(i));
+if (smallTblFileNames.size() > 0) {
+  if (bigTblBucketNum >= smallTblBucketNum) {
+// if the big table has more buckets than the current small table,
+// use "MOD" to get small table bucket names. For example, if the 
big
+// table has 4 buckets and the small table has 2 buckets, then the
+// mapping should be 0->0, 1->1, 2->0, 3->1.
+int toAddSmallIndex = bindex % smallTblBucketNum;
+resultFileNames.add(smallTblFileNames.get(toAddSmallIndex));
+  } else {
+int jump = smallTblBucketNum / bigTblBucketNum;
+for (int i = bindex; i < smallTblFileNames.size(); i = i + jump) {
+  resultFileNames.add(smallTblFileNames.get(i));
+}
   }
 }
   }



[1/2] hive git commit: HIVE-10571 : HiveMetaStoreClient should close existing thrift connection before its reconnect (Chaoyu Tang via Szehon)

2015-09-24 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 2801d2c4b -> 0bb08b3cd


HIVE-10571 : HiveMetaStoreClient should close existing thrift connection before 
its reconnect (Chaoyu Tang via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7bc067a6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7bc067a6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7bc067a6

Branch: refs/heads/branch-1.0
Commit: 7bc067a64802fbaa72996e8ef41e2134d71c5c14
Parents: 2801d2c
Author: Szehon Ho 
Authored: Mon May 4 11:20:36 2015 -0700
Committer: Pengcheng Xiong 
Committed: Thu Sep 24 20:40:01 2015 -0700

--
 .../org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/7bc067a6/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index abfdfb1..3c4d814 100644
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -263,6 +263,7 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient {
   throw new MetaException("For direct MetaStore DB connections, we don't 
support retries" +
   " at the client level.");
 } else {
+  close();
   // Swap the first element of the metastoreUris[] with a random element 
from the rest
   // of the array. Rationale being that this method will generally be 
called when the default
   // connection has died and the default connection is likely to be the 
first array element.
@@ -436,7 +437,7 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient {
 client.shutdown();
   }
 } catch (TException e) {
-  LOG.error("Unable to shutdown local metastore client", e);
+  LOG.debug("Unable to shutdown metastore client. Will try closing 
transport directly.", e);
 }
 // Transport would have got closed via client.shutdown(), so we dont need 
this, but
 // just in case, we make this call.



[2/2] hive git commit: HIVE-10646 : ColumnValue does not handle NULL_TYPE (Yongzhi Chen via Szehon)

2015-09-24 Thread pxiong
HIVE-10646 : ColumnValue does not handle NULL_TYPE (Yongzhi Chen via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0bb08b3c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0bb08b3c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0bb08b3c

Branch: refs/heads/branch-1.0
Commit: 0bb08b3cd86b9ddb3085552e3693d6e874efb1da
Parents: 7bc067a
Author: Szehon Ho 
Authored: Sun May 10 22:21:15 2015 -0500
Committer: Pengcheng Xiong 
Committed: Thu Sep 24 20:40:53 2015 -0700

--
 service/src/java/org/apache/hive/service/cli/ColumnValue.java | 2 ++
 1 file changed, 2 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0bb08b3c/service/src/java/org/apache/hive/service/cli/ColumnValue.java
--
diff --git a/service/src/java/org/apache/hive/service/cli/ColumnValue.java 
b/service/src/java/org/apache/hive/service/cli/ColumnValue.java
index 9b48396..d383180 100644
--- a/service/src/java/org/apache/hive/service/cli/ColumnValue.java
+++ b/service/src/java/org/apache/hive/service/cli/ColumnValue.java
@@ -180,6 +180,8 @@ public class ColumnValue {
 case UNION_TYPE:
 case USER_DEFINED_TYPE:
   return stringValue((String)value);
+case NULL_TYPE:
+  return stringValue((String)value);
 default:
   return null;
 }



[1/2] hive git commit: HIVE-10965 : direct SQL for stats fails in 0-column case (Sergey Shelukhin reviewed by Ashutosh Chauhan)

2015-09-26 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 0bb08b3cd -> 37206a49f


HIVE-10965 : direct SQL for stats fails in 0-column case (Sergey Shelukhin 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b511cad
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b511cad
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b511cad

Branch: refs/heads/branch-1.0
Commit: 0b511cad47b61236da48e2b9768b3e6e8148b4e1
Parents: 0bb08b3
Author: Thejas Nair 
Authored: Wed Jun 10 15:10:57 2015 -0700
Committer: Pengcheng Xiong 
Committed: Sat Sep 26 21:47:33 2015 -0700

--
 .../org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java | 1 +
 .../java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java | 2 ++
 ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java   | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0b511cad/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 3c4d814..627f7ea 100644
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -1900,6 +1900,7 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient {
   @Override
   public AggrStats getAggrColStatsFor(String dbName, String tblName,
 List colNames, List partNames) throws 
NoSuchObjectException, MetaException, TException {
+if (colNames.isEmpty()) return null; // Nothing to aggregate.
 PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, 
colNames, partNames);
 return client.get_aggr_stats_for(req);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/0b511cad/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index a340e88..f34ce45 100644
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -1082,6 +1082,7 @@ class MetaStoreDirectSql {
 
   public AggrStats aggrColStatsForPartitions(String dbName, String tableName,
   List partNames, List colNames) throws MetaException {
+if (colNames.isEmpty() || partNames.isEmpty()) return new AggrStats(); // 
Nothing to aggregate.
 long partsFound = partsFoundForPartitions(dbName, tableName, partNames, 
colNames);
 List stats = columnStatisticsObjForPartitions(dbName,
 tableName, partNames, colNames, partsFound);
@@ -1090,6 +1091,7 @@ class MetaStoreDirectSql {
 
   private long partsFoundForPartitions(String dbName, String tableName,
   List partNames, List colNames) throws MetaException {
+assert !colNames.isEmpty() && !partNames.isEmpty();
 long partsFound = 0;
 boolean doTrace = LOG.isDebugEnabled();
 String queryText = "select count(\"COLUMN_NAME\") from \"PART_COL_STATS\""

http://git-wip-us.apache.org/repos/asf/hive/blob/0b511cad/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 30f63a2..b0bd8ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -245,7 +245,8 @@ public class StatsUtils {
 neededColumns = processNeededColumns(schema, neededColumns, 
colToTabAlias);
 AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), 
table.getTableName(),
 neededColumns, partNames);
-if (null == aggrStats) {
+if (null == aggrStats || null == aggrStats.getColStats()
+|| aggrStats.getColStatsSize() == 0) {
   // There are some partitions with no state (or we didn't fetch any 
state).
   // Update the stats with empty list to reflect that in the
   // state/initialize structures.



[2/2] hive git commit: HIVE-9566: HiveServer2 fails to start with NullPointerException (Na via Xuefu)

2015-09-26 Thread pxiong
HIVE-9566: HiveServer2 fails to start with NullPointerException (Na via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/37206a49
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/37206a49
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/37206a49

Branch: refs/heads/branch-1.0
Commit: 37206a49f1f6e12f3ac997bb04d3b383ae7781e1
Parents: 0b511ca
Author: Xuefu Zhang 
Authored: Tue Jun 30 05:15:40 2015 -0700
Committer: Pengcheng Xiong 
Committed: Sat Sep 26 22:04:13 2015 -0700

--
 service/src/java/org/apache/hive/service/server/HiveServer2.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/37206a49/service/src/java/org/apache/hive/service/server/HiveServer2.java
--
diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java 
b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index 17e1d85..4f44987 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -267,7 +267,7 @@ public class HiveServer2 extends CompositeService {
 HiveConf hiveConf = this.getHiveConf();
 super.stop();
 // Remove this server instance from ZooKeeper if dynamic service discovery 
is set
-if 
(hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) {
+if (hiveConf != null && 
hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) {
   try {
 removeServerInstanceFromZooKeeper();
   } catch (Exception e) {
@@ -276,7 +276,7 @@ public class HiveServer2 extends CompositeService {
 }
 // There should already be an instance of the session pool manager.
 // If not, ignoring is fine while stopping HiveServer2.
-if 
(hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_TEZ_INITIALIZE_DEFAULT_SESSIONS)) {
+if (hiveConf != null && 
hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_TEZ_INITIALIZE_DEFAULT_SESSIONS)) {
   try {
 TezSessionPoolManager.getInstance().stop();
   } catch (Exception e) {



hive git commit: HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)

2015-09-26 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.2 e7c16699f -> f428af1d2


HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory 
(Pengcheng Xiong, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f428af1d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f428af1d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f428af1d

Branch: refs/heads/branch-1.2
Commit: f428af1d2908588dd68eb30cde2f158bf9ef04c0
Parents: e7c1669
Author: Hari Subramaniyan 
Authored: Wed Jul 15 13:15:34 2015 -0700
Committer: Pengcheng Xiong 
Committed: Sat Sep 26 23:03:45 2015 -0700

--
 .../stats/annotation/StatsRulesProcFactory.java | 42 ++--
 1 file changed, 22 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f428af1d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 0982059..376d42c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -1013,17 +1013,14 @@ public class StatsRulesProcFactory {
*/
   public static class JoinStatsRule extends DefaultStatsRule implements 
NodeProcessor {
 
-private boolean pkfkInferred = false;
-private long newNumRows = 0;
-private List> parents;
-private CommonJoinOperator jop;
-private int numAttr = 1;
 
 @Override
 public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
 Object... nodeOutputs) throws SemanticException {
-  jop = (CommonJoinOperator) nd;
-  parents = jop.getParentOperators();
+  long newNumRows = 0;
+  CommonJoinOperator jop = (CommonJoinOperator) nd;
+  List> parents = 
jop.getParentOperators();
+  int numAttr = 1;
   AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
   HiveConf conf = aspCtx.getConf();
   boolean allStatsAvail = true;
@@ -1062,7 +1059,7 @@ public class StatsRulesProcFactory {
   numAttr = keyExprs.size();
 
   // infer PK-FK relationship in single attribute join case
-  inferPKFKRelationship();
+  long inferredRowCount = inferPKFKRelationship(numAttr, parents, jop);
   // get the join keys from parent ReduceSink operators
   for (int pos = 0; pos < parents.size(); pos++) {
 ReduceSinkOperator parent = (ReduceSinkOperator) 
jop.getParentOperators().get(pos);
@@ -1149,7 +1146,7 @@ public class StatsRulesProcFactory {
 
   // update join statistics
   stats.setColumnStats(outColStats);
-  long newRowCount = pkfkInferred ? newNumRows : 
computeNewRowCount(rowCounts, denom);
+  long newRowCount = inferredRowCount !=-1 ? inferredRowCount : 
computeNewRowCount(rowCounts, denom);
   updateStatsForJoinType(stats, newRowCount, jop, rowCountParents);
   jop.setStatistics(stats);
 
@@ -1180,7 +1177,7 @@ public class StatsRulesProcFactory {
   }
 
   long maxDataSize = parentSizes.get(maxRowIdx);
-  long newNumRows = 
StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), 
joinFactor);
+  newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, 
(numParents - 1)), joinFactor);
   long newDataSize = 
StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), 
joinFactor);
   Statistics wcStats = new Statistics();
   wcStats.setNumRows(newNumRows);
@@ -1195,15 +1192,17 @@ public class StatsRulesProcFactory {
   return null;
 }
 
-private void inferPKFKRelationship() {
+private long inferPKFKRelationship(int numAttr, List> parents,
+CommonJoinOperator jop) {
+  long newNumRows = -1;
   if (numAttr == 1) {
 // If numAttr is 1, this means we join on one single key column.
 Map parentsWithPK = 
getPrimaryKeyCandidates(parents);
 
 // We only allow one single PK.
 if (parentsWithPK.size() != 1) {
-  LOG.debug("STATS-" + jop.toString() + ": detects multiple PK 
parents.");
-  return;
+  LOG.debug("STATS-" + jop.toString() + ": detects none/multiple PK 
parents.");
+  return newNumRows;
 }
 Integer pkPos = parentsWithPK.keySet().iterator().next();
 ColStatistics csPK = parentsWithPK.values().iterator().next();
@@ -1215,7 +1214,7 @@ public class StatsRulesProcFactory {
 //

hive git commit: HIVE-11937: Improve StatsOptimizer to deal with query with additional constant columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-09-29 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 6a8d7e4cd -> cdaf35674


HIVE-11937: Improve StatsOptimizer to deal with query with additional constant 
columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdaf3567
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdaf3567
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdaf3567

Branch: refs/heads/master
Commit: cdaf356740195cde6f5b6bfdade2f614e1c618d3
Parents: 6a8d7e4
Author: Pengcheng Xiong 
Authored: Tue Sep 29 17:47:39 2015 -0700
Committer: Pengcheng Xiong 
Committed: Tue Sep 29 17:47:39 2015 -0700

--
 .../hive/ql/optimizer/StatsOptimizer.java   |  46 -
 .../clientpositive/metadata_only_queries.q  |  15 ++
 .../clientpositive/metadata_only_queries.q.out  | 158 +
 .../spark/metadata_only_queries.q.out   | 170 +++
 .../tez/metadata_only_queries.q.out | 170 +++
 5 files changed, 552 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index bc8d8f7..5a21e6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -235,10 +237,23 @@ public class StatsOptimizer implements Transform {
   return null;
 }
 Operator last = (Operator) stack.get(5);
+SelectOperator cselOp = null;
+Map posToConstant = new HashMap<>();
 if (last instanceof SelectOperator) {
-  SelectOperator cselOp = (SelectOperator) last;
+  cselOp = (SelectOperator) last;
   if (!cselOp.isIdentitySelect()) {
-return null;  // todo we can do further by providing operator to 
fetch task
+for (int pos = 0; pos < cselOp.getConf().getColList().size(); 
pos++) {
+  ExprNodeDesc desc = cselOp.getConf().getColList().get(pos);
+  if (desc instanceof ExprNodeConstantDesc) {
+//We store the position to the constant value for later use.
+posToConstant.put(pos, 
((ExprNodeConstantDesc)desc).getValue());
+  } else {
+if (!(desc instanceof ExprNodeColumnDesc)) {
+  // Probably an expression, cant handle that
+  return null;
+}
+  }
+}
   }
   last = (Operator) stack.get(6);
 }
@@ -588,13 +603,30 @@ public class StatsOptimizer implements Transform {
 
 
 List> allRows = new ArrayList>();
-allRows.add(oneRow);
-
 List colNames = new ArrayList();
 List ois = new ArrayList();
-for (ColumnInfo colInfo: cgbyOp.getSchema().getSignature()) {
-  colNames.add(colInfo.getInternalName());
-  
ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+if (cselOp == null) {
+  allRows.add(oneRow);
+  for (ColumnInfo colInfo : cgbyOp.getSchema().getSignature()) {
+colNames.add(colInfo.getInternalName());
+
ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+  }
+} else {
+  int aggrPos = 0;
+  List oneRowWithConstant = new ArrayList<>();
+  for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); 
pos++) {
+if (posToConstant.containsKey(pos)) {
+  // This position is a constant.
+  oneRowWithConstant.add(posToConstant.get(pos));
+} else {
+  // This position is an aggregation.
+  oneRowWithConstant.add(oneRow.get(aggrPos++));
+}
+ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos);
+colNames.add(colInfo.getInternalName());
+
ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+  }
+  allRows.add(oneRowWithConstant);
 }
 StandardStructObjectInspector sOI = ObjectInspectorFactory.
 getStandardStructObjectInspector(colNames, ois);

http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/queries/cl

hive git commit: HIVE-11989: vector_groupby_reduce.q is failing on CLI and MiniTez drivers on master (Pengcheng Xiong, reviewed by Matt McCline)

2015-09-30 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 1f0878939 -> e9b4d7e4e


HIVE-11989: vector_groupby_reduce.q is failing on CLI and MiniTez drivers on 
master (Pengcheng Xiong, reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9b4d7e4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9b4d7e4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9b4d7e4

Branch: refs/heads/master
Commit: e9b4d7e4e89cd68eeda58b2e2e6014ff24d0a690
Parents: 1f08789
Author: Pengcheng Xiong 
Authored: Wed Sep 30 10:06:36 2015 -0700
Committer: Pengcheng Xiong 
Committed: Wed Sep 30 10:06:36 2015 -0700

--
 .../tez/vector_groupby_reduce.q.out | 70 +---
 .../clientpositive/vector_groupby_reduce.q.out  | 69 +--
 2 files changed, 94 insertions(+), 45 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e9b4d7e4/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out 
b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
index fe7e829..1635462 100644
--- a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
@@ -399,7 +399,7 @@ STAGE PLANS:
   sort order: +
   Statistics: Num rows: 250 Data size: 22069 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized
-Reducer 3
+Reducer 3 
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: int)
@@ -562,31 +562,32 @@ STAGE PLANS:
   Edges:
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
  A masked pattern was here 
   Vertices:
-Map 1
+Map 1 
 Map Operator Tree:
 TableScan
   alias: store_sales
   Statistics: Num rows: 1000 Data size: 88276 Basic stats: 
COMPLETE Column stats: NONE
   Select Operator
-expressions: ss_ticket_number (type: int), ss_item_sk 
(type: int), ss_quantity (type: int)
-outputColumnNames: _col0, _col1, _col2
+expressions: ss_item_sk (type: int), ss_ticket_number 
(type: int), ss_quantity (type: int)
+outputColumnNames: ss_item_sk, ss_ticket_number, 
ss_quantity
 Statistics: Num rows: 1000 Data size: 88276 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
-  aggregations: min(_col2)
-  keys: _col0 (type: int), _col1 (type: int)
+  aggregations: min(ss_quantity)
+  keys: ss_item_sk (type: int), ss_ticket_number (type: 
int)
   mode: hash
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 1000 Data size: 88276 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int), _col1 (type: int)
 sort order: ++
-Map-reduce partition columns: _col0 (type: int)
+Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
 Statistics: Num rows: 1000 Data size: 88276 Basic 
stats: COMPLETE Column stats: NONE
 value expressions: _col2 (type: int)
 Execution mode: vectorized
-Reducer 2
+Reducer 2 
 Reduce Operator Tree:
   Group By Operator
 aggregations: min(VALUE._col0)
@@ -595,18 +596,33 @@ STAGE PLANS:
 outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 500 Data size: 44138 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
-  aggregations: sum(_col1), sum(_col2)
-  keys: _col0 (type: int)
-  mode: complete
+  aggregations: sum(_col0), sum(_col2)
+  keys: _col1 (type: int)
+  mode: hash
   outputColumnNames: _col0, _col1, _col2
-  Statistics: Num rows: 250 Data size: 22069 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 500 Data size: 44138 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sor

[3/3] hive git commit: HIVE-11699: Support special characters in quoted table names (Pengcheng Xiong, reviewed by John Pullokkaran)

2015-10-04 Thread pxiong
HIVE-11699: Support special characters in quoted table names (Pengcheng Xiong, 
reviewed by John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c23841e5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c23841e5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c23841e5

Branch: refs/heads/master
Commit: c23841e553cbd4f32d33842d49f9b9e52803d143
Parents: d545935
Author: Pengcheng Xiong 
Authored: Sun Oct 4 12:45:21 2015 -0700
Committer: Pengcheng Xiong 
Committed: Sun Oct 4 12:45:21 2015 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   | 5 +
 .../hadoop/hive/metastore/HiveAlterHandler.java | 2 +-
 .../hadoop/hive/metastore/HiveMetaStore.java| 8 +-
 .../hadoop/hive/metastore/MetaStoreUtils.java   |40 +-
 .../apache/hadoop/hive/metastore/Warehouse.java | 4 +-
 .../java/org/apache/hadoop/hive/ql/Driver.java  | 2 +-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 6 +-
 .../hadoop/hive/ql/lockmgr/DummyTxnManager.java | 3 +-
 .../hadoop/hive/ql/lockmgr/HiveLockObject.java  | 6 +-
 .../apache/hadoop/hive/ql/metadata/Hive.java| 4 +-
 .../apache/hadoop/hive/ql/metadata/Table.java   | 5 +-
 .../RewriteQueryUsingAggregateIndexCtx.java | 2 +-
 .../ql/parse/ColumnStatsSemanticAnalyzer.java   | 2 +-
 .../hadoop/hive/ql/metadata/TestHive.java   | 2 +-
 .../special_character_in_tabnames_1.q   |13 +
 .../special_character_in_tabnames_1.q   |  1075 +
 .../special_character_in_tabnames_2.q   |40 +
 .../special_character_in_tabnames_1.q.out   |10 +
 .../special_character_in_tabnames_1.q.out   | 19550 +
 .../special_character_in_tabnames_2.q.out   |   304 +
 20 files changed, 21060 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c23841e5/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 33ef654..7f632bc 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2233,6 +2233,11 @@ public class HiveConf extends Configuration {
 
HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS("hive.support.sql11.reserved.keywords", 
true,
 "This flag should be set to true to enable support for SQL2011 
reserved keywords.\n" +
 "The default value is true."),
+
HIVE_SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES("hive.support.special.characters.tablename",
 true,
+"This flag should be set to true to enable support for special 
characters in table names.\n"
++ "When it is set to false, only [a-zA-Z_0-9]+ are supported.\n"
++ "The only supported special character right now is '/'. This flag 
applies only to quoted table names.\n"
++ "The default value is true."),
 // role names are case-insensitive
 USERS_IN_ADMIN_ROLE("hive.users.in.admin.role", "", false,
 "Comma separated list of users who are in admin role for 
bootstrapping.\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/c23841e5/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index 0082773..45f3515 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -82,7 +82,7 @@ public class HiveAlterHandler implements AlterHandler {
   throw new InvalidOperationException("New table is invalid: " + newt);
 }
 
-if (!MetaStoreUtils.validateName(newt.getTableName())) {
+if (!MetaStoreUtils.validateName(newt.getTableName(), hiveConf)) {
   throw new InvalidOperationException(newt.getTableName()
   + " is not a valid object name");
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c23841e5/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 8cd1f52..9d10e21 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -865,7 +865,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
 

[1/3] hive git commit: HIVE-11699: Support special characters in quoted table names (Pengcheng Xiong, reviewed by John Pullokkaran)

2015-10-04 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master d545935a5 -> c23841e55


http://git-wip-us.apache.org/repos/asf/hive/blob/c23841e5/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out 
b/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out
new file mode 100644
index 000..51d31e0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out
@@ -0,0 +1,304 @@
+PREHOOK: query: -- try the query without indexing, with manual indexing, and 
with automatic indexing
+-- SORT_QUERY_RESULTS
+
+DROP TABLE IF EXISTS `s/c`
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- try the query without indexing, with manual indexing, and 
with automatic indexing
+-- SORT_QUERY_RESULTS
+
+DROP TABLE IF EXISTS `s/c`
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE `s/c` (key STRING COMMENT 'default', value STRING 
COMMENT 'default') STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@s/c
+POSTHOOK: query: CREATE TABLE `s/c` (key STRING COMMENT 'default', value 
STRING COMMENT 'default') STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@s/c
+PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE 
`s/c`
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@s/c
+POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE 
`s/c`
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@s/c
+PREHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s/c
+PREHOOK: Output: default@s/c
+POSTHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s/c
+POSTHOOK: Output: default@s/c
+PREHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s/c
+ A masked pattern was here 
+POSTHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s/c
+ A masked pattern was here 
+PREHOOK: query: -- without indexing
+SELECT key, value FROM `s/c` WHERE key > 80 AND key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s/c
+ A masked pattern was here 
+POSTHOOK: query: -- without indexing
+SELECT key, value FROM `s/c` WHERE key > 80 AND key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s/c
+ A masked pattern was here 
+82 val_82
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: CREATE INDEX src_index ON TABLE `s/c`(key) as 'COMPACT' WITH 
DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+PREHOOK: Input: default@s/c
+POSTHOOK: query: CREATE INDEX src_index ON TABLE `s/c`(key) as 'COMPACT' WITH 
DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Input: default@s/c
+POSTHOOK: Output: default@default__s/c_src_index__
+PREHOOK: query: ALTER INDEX src_index ON `s/c` REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@s/c
+PREHOOK: Output: default@default__s/c_src_index__
+POSTHOOK: query: ALTER INDEX src_index ON `s/c` REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@s/c
+POSTHOOK: Output: default@default__s/c_src_index__
+POSTHOOK: Lineage: default__s/c_src_index__._bucketname SIMPLE 
[(s/c)s/c.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__s/c_src_index__._offsets EXPRESSION 
[(s/c)s/c.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), 
]
+POSTHOOK: Lineage: default__s/c_src_index__.key SIMPLE 
[(s/c)s/c.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- manual indexing
+ A masked pattern was here 
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__s/c_src_index__
+ A masked pattern was here 
+POSTHOOK: query: -- manual indexing
+ A masked pattern was here 
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__s/c_src_index__
+ A masked pattern was here 
+PREHOOK: query: EXPLAIN SELECT key, value FROM `s/c` WHERE key > 80 AND key < 
100
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT key, value FROM `s/c` WHERE key > 80 AND key < 
100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: s/c
+Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column

[2/3] hive git commit: HIVE-11699: Support special characters in quoted table names (Pengcheng Xiong, reviewed by John Pullokkaran)

2015-10-04 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/c23841e5/ql/src/test/results/clientpositive/special_character_in_tabnames_1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/special_character_in_tabnames_1.q.out 
b/ql/src/test/results/clientpositive/special_character_in_tabnames_1.q.out
new file mode 100644
index 000..bd0088a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/special_character_in_tabnames_1.q.out
@@ -0,0 +1,19550 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table `c/b/o_t1`(key string, value string, c_int int, c_float float, 
c_boolean boolean)  partitioned by (dt string) row format delimited fields 
terminated by ',' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@c/b/o_t1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table `c/b/o_t1`(key string, value string, c_int int, c_float float, 
c_boolean boolean)  partitioned by (dt string) row format delimited fields 
terminated by ',' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@c/b/o_t1
+PREHOOK: query: create table `//cbo_t2`(key string, value string, c_int int, 
c_float float, c_boolean boolean)  partitioned by (dt string) row format 
delimited fields terminated by ',' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@//cbo_t2
+POSTHOOK: query: create table `//cbo_t2`(key string, value string, c_int int, 
c_float float, c_boolean boolean)  partitioned by (dt string) row format 
delimited fields terminated by ',' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@//cbo_t2
+PREHOOK: query: create table `cbo_/t3`(key string, value string, c_int 
int, c_float float, c_boolean boolean)  row format delimited fields terminated 
by ',' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cbo_/t3
+POSTHOOK: query: create table `cbo_/t3`(key string, value string, c_int 
int, c_float float, c_boolean boolean)  row format delimited fields terminated 
by ',' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cbo_/t3
+PREHOOK: query: load data local inpath '../../data/files/cbo_t1.txt' into 
table `c/b/o_t1` partition (dt='2014')
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@c/b/o_t1
+POSTHOOK: query: load data local inpath '../../data/files/cbo_t1.txt' into 
table `c/b/o_t1` partition (dt='2014')
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@c/b/o_t1
+POSTHOOK: Output: default@c/b/o_t1@dt=2014
+PREHOOK: query: load data local inpath '../../data/files/cbo_t2.txt' into 
table `//cbo_t2` partition (dt='2014')
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@//cbo_t2
+POSTHOOK: query: load data local inpath '../../data/files/cbo_t2.txt' into 
table `//cbo_t2` partition (dt='2014')
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@//cbo_t2
+POSTHOOK: Output: default@//cbo_t2@dt=2014
+PREHOOK: query: load data local inpath '../../data/files/cbo_t3.txt' into 
table `cbo_/t3`
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@cbo_/t3
+POSTHOOK: query: load data local inpath '../../data/files/cbo_t3.txt' into 
table `cbo_/t3`
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@cbo_/t3
+PREHOOK: query: CREATE TABLE `p/a/r/t`(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@p/a/r/t
+POSTHOOK: query: CREATE TABLE `p/a/r/t`(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@p/a/r/t
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' 
overwrite into table `p/a/r/t`
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@p/a/r/t
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' 
overwrite into table `p/a/r/t`
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@p/a/r/t
+PREHOOK: query: CREATE TABLE `line/item` (L_ORDERKEY  INT,
+L_PARTKEY   INT,
+L_SUPPKEY   INT,
+L_LINENUMBERINT,
+

hive git commit: backport HIVE-11301: thrift metastore issue when getting stats results in disconnect (Pengcheng Xiong, reviewed by Sergey Shelukhin)

2015-10-05 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 2414c350f -> f1939cb5a


backport HIVE-11301: thrift metastore issue when getting stats results in 
disconnect (Pengcheng Xiong, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1939cb5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1939cb5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1939cb5

Branch: refs/heads/branch-1.0
Commit: f1939cb5ad3e0a10736d986f3ed47b5577da2ef9
Parents: 2414c35
Author: Pengcheng Xiong 
Authored: Mon Oct 5 18:20:38 2015 -0700
Committer: Pengcheng Xiong 
Committed: Mon Oct 5 18:20:38 2015 -0700

--
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  10 +-
 .../test/queries/clientpositive/stats_ppr_all.q |  24 ++
 .../results/clientpositive/stats_ppr_all.q.out  | 300 +++
 3 files changed, 332 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index b0bd8ce..26cf56d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -243,8 +243,14 @@ public class StatsUtils {
 }
 Map colToTabAlias = new HashMap();
 neededColumns = processNeededColumns(schema, neededColumns, 
colToTabAlias);
-AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), 
table.getTableName(),
-neededColumns, partNames);
+AggrStats aggrStats = null;
+// We check the sizes of neededColumns and partNames here. If either
+// size is 0, aggrStats is null after several retries. Thus, we can
+// skip the step to connect to the metastore.
+if (neededColumns.size() > 0 && partNames.size() > 0) {
+  aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), 
table.getTableName(),
+  neededColumns, partNames);
+}
 if (null == aggrStats || null == aggrStats.getColStats()
 || aggrStats.getColStatsSize() == 0) {
   // There are some partitions with no state (or we didn't fetch any 
state).

http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/test/queries/clientpositive/stats_ppr_all.q
--
diff --git a/ql/src/test/queries/clientpositive/stats_ppr_all.q 
b/ql/src/test/queries/clientpositive/stats_ppr_all.q
new file mode 100644
index 000..a5630cb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_ppr_all.q
@@ -0,0 +1,24 @@
+set hive.stats.fetch.column.stats=true;
+
+drop table ss;
+
+CREATE TABLE ss (
+sales_order_id  BIGINT,
+order_amountFLOAT)
+PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc;
+
+insert overwrite table ss partition(country="US", year=2015, month=1, day=1) 
select 1, 22.0 from src limit 1;
+insert overwrite table ss partition(country="US", year=2015, month=2, day=1) 
select 2, 2.0 from src limit 1;
+insert overwrite table ss partition(country="US", year=2015, month=1, day=2) 
select 1, 2.0 from src limit 1;
+
+ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for 
columns;
+
+explain select sum(order_amount) from ss where (country="US" and year=2015 and 
month=2 and day=1);
+
+explain select sum(order_amount) from ss where (year*1+month*100+day) = 
"2015010" and 1>0;
+
+explain select sum(order_amount) from ss where (year*100+month*10+day) = 
"201511" and 1>0;
+
+explain select sum(order_amount) from ss where (year*100+month*10+day) > 
"201511" and 1>0;
+
+explain select '1' from ss where (year*100+month*10+day) > "201511";
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/test/results/clientpositive/stats_ppr_all.q.out
--
diff --git a/ql/src/test/results/clientpositive/stats_ppr_all.q.out 
b/ql/src/test/results/clientpositive/stats_ppr_all.q.out
new file mode 100644
index 000..d00c91e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_ppr_all.q.out
@@ -0,0 +1,300 @@
+PREHOOK: query: drop table ss
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table ss
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE ss (
+sales_order_id  BIGINT,
+order_amountFLOAT)
+PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ss
+POSTHOOK: query: CREATE TABLE ss (
+

hive git commit: HIVE-12011: unable to create temporary table using CTAS if regular table with that name already exists (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)

2015-10-06 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master bcff87199 -> b2f63ba91


HIVE-12011: unable to create temporary table using CTAS if regular table with 
that name already exists (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b2f63ba9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b2f63ba9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b2f63ba9

Branch: refs/heads/master
Commit: b2f63ba914045e8285445d48bf656627a84bd2f7
Parents: bcff871
Author: Pengcheng Xiong 
Authored: Tue Oct 6 14:19:12 2015 -0700
Committer: Pengcheng Xiong 
Committed: Tue Oct 6 14:19:12 2015 -0700

--
 .../ql/metadata/SessionHiveMetaStoreClient.java |   2 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  31 --
 ql/src/test/queries/clientpositive/temp_table.q |  26 +
 .../clientpositive/spark/temp_table.q.out   | 107 +++
 .../results/clientpositive/temp_table.q.out | 107 +++
 .../results/clientpositive/tez/temp_table.q.out | 107 +++
 6 files changed, 372 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b2f63ba9/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
index 51ff262..6091c3f 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
@@ -515,7 +515,7 @@ public class SessionHiveMetaStoreClient extends 
HiveMetaStoreClient implements I
 return newCopy;
   }
 
-  private Map getTempTablesForDatabase(String dbName) {
+  public static Map getTempTablesForDatabase(String dbName) {
 SessionState ss = SessionState.get();
 if (ss == null) {
   LOG.debug("No current SessionState, skipping temp tables");

http://git-wip-us.apache.org/repos/asf/hive/blob/b2f63ba9/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 4bec228..7a54aec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -112,6 +112,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.optimizer.Optimizer;
@@ -10943,14 +10944,30 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 
 case CTAS: // create table as select
 
-  // Verify that the table does not already exist
-  try {
-Table dumpTable = db.newTable(dbDotTab);
-if (null != db.getTable(dumpTable.getDbName(), 
dumpTable.getTableName(), false)) {
-  throw new 
SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab));
+  if (isTemporary) {
+String dbName = qualifiedTabName[0];
+String tblName = qualifiedTabName[1];
+SessionState ss = SessionState.get();
+if (ss == null) {
+  throw new SemanticException("No current SessionState, cannot create 
temporary table "
+  + dbName + "." + tblName);
+}
+Map tables = 
SessionHiveMetaStoreClient.getTempTablesForDatabase(dbName);
+if (tables != null && tables.containsKey(tblName)) {
+  throw new SemanticException("Temporary table " + dbName + "." + 
tblName
+  + " already exists");
+}
+  } else {
+// Verify that the table does not already exist
+// dumpTable is only used to check the conflict for non-temporary 
tables
+try {
+  Table dumpTable = db.newTable(dbDotTab);
+  if (null != db.getTable(dumpTable.getDbName(), 
dumpTable.getTableName(), false)) {
+throw new 
SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab));
+  }
+} catch (HiveException e) {
+  throw new SemanticException(e);
 }
-  } catch (HiveException e) {
-throw new SemanticException(e);
   }
 
   if(location != null && location.length()

[2/2] hive git commit: HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-11-24 Thread pxiong
HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ab98ffc2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ab98ffc2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ab98ffc2

Branch: refs/heads/master
Commit: ab98ffc2688abbc75de13524ca46848e566354ef
Parents: 306a640
Author: Pengcheng Xiong 
Authored: Tue Nov 24 17:09:40 2015 +0800
Committer: Pengcheng Xiong 
Committed: Tue Nov 24 17:09:40 2015 +0800

--
 .../hadoop/hive/common/StatsSetupConst.java |  13 -
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   8 +-
 data/conf/llap/hive-site.xml|   2 +-
 data/conf/spark/standalone/hive-site.xml|   2 +-
 data/conf/spark/yarn-client/hive-site.xml   |   2 +-
 data/conf/tez/hive-site.xml |   4 +-
 .../hive/ql/stats/CounterStatsAggregator.java   |  82 
 .../ql/stats/CounterStatsAggregatorSpark.java   |  58 ---
 .../ql/stats/CounterStatsAggregatorTez.java |  79 
 .../hive/ql/stats/CounterStatsPublisher.java|  66 ---
 .../hadoop/hive/ql/stats/StatsFactory.java  |  11 -
 .../test/queries/clientpositive/index_bitmap3.q |   1 -
 .../queries/clientpositive/index_bitmap_auto.q  |   1 -
 .../test/queries/clientpositive/stats_counter.q |  16 -
 .../clientpositive/stats_counter_partitioned.q  |  45 --
 .../clientpositive/llap/stats_counter.q.out | 102 
 .../llap/stats_counter_partitioned.q.out| 465 ---
 .../clientpositive/spark/stats_counter.q.out| 102 
 .../spark/stats_counter_partitioned.q.out   | 465 ---
 .../results/clientpositive/stats_counter.q.out  | 102 
 .../stats_counter_partitioned.q.out | 465 ---
 .../clientpositive/tez/metadataonly1.q.out  |  72 +--
 .../clientpositive/tez/optimize_nullscan.q.out  |  90 ++--
 .../clientpositive/tez/stats_counter.q.out  | 102 
 .../tez/stats_counter_partitioned.q.out | 465 ---
 25 files changed, 88 insertions(+), 2732 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java 
b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index 0a44bde..2ff76ee 100644
--- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -31,19 +31,6 @@ import java.util.Map;
 public class StatsSetupConst {
 
   public enum StatDB {
-counter {
-  @Override
-  public String getPublisher(Configuration conf) {
-return "org.apache.hadoop.hive.ql.stats.CounterStatsPublisher"; }
-  @Override
-  public String getAggregator(Configuration conf) {
-if (HiveConf.getVar(conf, 
HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
-  return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez";
-} else if (HiveConf.getVar(conf, 
HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
-  return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorSpark";
-}
-return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregator"; }
-},
 fs {
   @Override
   public String getPublisher(Configuration conf) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f48403b..fffedd9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1354,10 +1354,10 @@ public class HiveConf extends Configuration {
 // Statistics
 HIVESTATSAUTOGATHER("hive.stats.autogather", true,
 "A flag to gather statistics automatically during the INSERT OVERWRITE 
command."),
-HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("counter", 
"custom", "fs"),
+HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", 
"fs"),
 "The storage that stores temporary Hive statistics. In filesystem 
based statistics collection ('fs'), \n" +
 "each task writes statistics it has collected in a file on the 
filesystem, which will be aggregated \n" +
-"after the job has finished. Supported values are fs (filesystem), 
counter, and custom as defined in StatsSetupConst.java."), // 
StatsSetupConst

[1/2] hive git commit: HIVE-12411: Remove counter based stats collection mechanism (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-11-24 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 306a64024 -> ab98ffc26


http://git-wip-us.apache.org/repos/asf/hive/blob/ab98ffc2/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out
--
diff --git a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out 
b/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out
deleted file mode 100644
index 626dcff..000
--- a/ql/src/test/results/clientpositive/stats_counter_partitioned.q.out
+++ /dev/null
@@ -1,465 +0,0 @@
-PREHOOK: query: -- partitioned table analyze 
-
-create table dummy (key string, value string) partitioned by (ds string, hr 
string)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@dummy
-POSTHOOK: query: -- partitioned table analyze 
-
-create table dummy (key string, value string) partitioned by (ds string, hr 
string)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@dummy
-PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='12')
-PREHOOK: type: LOAD
- A masked pattern was here 
-PREHOOK: Output: default@dummy
-POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='12')
-POSTHOOK: type: LOAD
- A masked pattern was here 
-POSTHOOK: Output: default@dummy
-POSTHOOK: Output: default@dummy@ds=2008/hr=12
-PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='11')
-PREHOOK: type: LOAD
- A masked pattern was here 
-PREHOOK: Output: default@dummy
-POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table 
dummy partition (ds='2008',hr='11')
-POSTHOOK: type: LOAD
- A masked pattern was here 
-POSTHOOK: Output: default@dummy
-POSTHOOK: Output: default@dummy@ds=2008/hr=11
-PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics
-PREHOOK: type: QUERY
-PREHOOK: Input: default@dummy
-PREHOOK: Input: default@dummy@ds=2008/hr=11
-PREHOOK: Input: default@dummy@ds=2008/hr=12
-PREHOOK: Output: default@dummy
-PREHOOK: Output: default@dummy@ds=2008/hr=11
-PREHOOK: Output: default@dummy@ds=2008/hr=12
-POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@dummy
-POSTHOOK: Input: default@dummy@ds=2008/hr=11
-POSTHOOK: Input: default@dummy@ds=2008/hr=12
-POSTHOOK: Output: default@dummy
-POSTHOOK: Output: default@dummy@ds=2008/hr=11
-POSTHOOK: Output: default@dummy@ds=2008/hr=12
-PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@dummy
-POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@dummy
-# col_name data_type   comment 
-
-keystring  
-value  string  
-
-# Partition Information 
-# col_name data_type   comment 
-
-ds string  
-hr string  
-
-# Detailed Partition Information
-Partition Value:   [2008, 11]   
-Database:  default  
-Table: dummy
- A masked pattern was here 
-Partition Parameters:   
-   COLUMN_STATS_ACCURATE   true
-   numFiles1   
-   numRows 500 
-   rawDataSize 5312
-   totalSize   5812
- A masked pattern was here 
-
-# Storage Information   
-SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe  
 
-InputFormat:   org.apache.hadoop.mapred.TextInputFormat 
-OutputFormat:  
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat   
-Compressed:No   
-Num Buckets:   -1   
-Bucket Columns:[]   
-Sort Columns:  []   
-Storage Desc Params:
-   serialization.format1   
-PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@dummy
-POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@dummy
-# col_name data_type   comment 
-
-keystring   

hive git commit: HIVE-12381: analyze table compute stats for table with special characters will wipe out all the table stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2015-12-02 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 54c3db908 -> 723f2d369


HIVE-12381: analyze table compute stats for table with special characters will 
wipe out all the table stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/723f2d36
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/723f2d36
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/723f2d36

Branch: refs/heads/master
Commit: 723f2d3695eed5e45bc61533fd229ec67cb77c5a
Parents: 54c3db9
Author: Pengcheng Xiong 
Authored: Wed Dec 2 22:31:45 2015 +0800
Committer: Pengcheng Xiong 
Committed: Wed Dec 2 22:31:45 2015 +0800

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   3 -
 .../src/test/queries/positive/hbase_stats3.q|  50 ---
 .../test/results/positive/hbase_stats3.q.out| 324 --
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |   4 +-
 .../apache/hadoop/hive/ql/exec/StatsTask.java   |  13 +-
 .../hadoop/hive/ql/exec/TableScanOperator.java  |   3 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  25 --
 .../hadoop/hive/ql/exec/spark/SparkTask.java|   6 +-
 .../ql/io/rcfile/stats/PartialScanMapper.java   |   5 +-
 .../hive/ql/optimizer/GenMapRedUtils.java   |   2 -
 .../RewriteQueryUsingAggregateIndexCtx.java |   3 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   3 +-
 .../hive/ql/parse/spark/GenSparkUtils.java  |   1 -
 .../hadoop/hive/ql/plan/FileSinkDesc.java   |  10 -
 .../hadoop/hive/ql/plan/TableScanDesc.java  |  10 -
 .../hadoop/hive/ql/stats/StatsFactory.java  |  11 -
 .../special_character_in_tabnames_1.q   |   9 +-
 .../special_character_in_tabnames_2.q   |   2 +-
 ql/src/test/queries/clientpositive/stats19.q| 105 -
 .../queries/clientpositive/stats_list_bucket.q  |   2 -
 .../special_character_in_tabnames_1.q.out   |  59 +++
 .../special_character_in_tabnames_2.q.out   |  10 +-
 .../test/results/clientpositive/stats19.q.out   | 430 ---
 .../stats_list_bucket.q.java1.7.out |   6 +-
 24 files changed, 91 insertions(+), 1005 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/723f2d36/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9e805bd..e984b6e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1391,9 +1391,6 @@ public class HiveConf extends Configuration {
 "A lower value for error indicates higher accuracy and a higher 
compute cost."),
 
HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION("hive.metastore.stats.ndv.densityfunction",
 false,
 "Whether to use density function to estimate the NDV for the whole 
table based on the NDV of partitions"),
-HIVE_STATS_KEY_PREFIX_MAX_LENGTH("hive.stats.key.prefix.max.length", 150,
-"Determines if when the prefix of the key used for intermediate stats 
collection\n" +
-"exceeds a certain length, a hash of the key is used instead.  If the 
value < 0 then hashing"),
 HIVE_STATS_KEY_PREFIX("hive.stats.key.prefix", "", "", true), // internal 
usage only
 // if length of variable length data type cannot be determined this length 
will be used.
 HIVE_STATS_MAX_VARIABLE_LENGTH("hive.stats.max.variable.length", 100,

http://git-wip-us.apache.org/repos/asf/hive/blob/723f2d36/hbase-handler/src/test/queries/positive/hbase_stats3.q
--
diff --git a/hbase-handler/src/test/queries/positive/hbase_stats3.q 
b/hbase-handler/src/test/queries/positive/hbase_stats3.q
deleted file mode 100644
index c74fa08..000
--- a/hbase-handler/src/test/queries/positive/hbase_stats3.q
+++ /dev/null
@@ -1,50 +0,0 @@
-set datanucleus.cache.collections=false;
-set hive.stats.autogather=true;
-set hive.stats.atomic=false;
-set hive.stats.collect.rawdatasize=false;
-
-create table stats_part like srcpart;
-
-set hive.stats.key.prefix.max.length=0;
-
--- The stats key should be hashed since the max length is too small
-insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') 
select key, value from src;
-
-desc formatted stats_part partition (ds='2010-04-08', hr = '13');
-
-set hive.stats.key.prefix.max.length=200;
-
--- The stats key should not be hashed since the max length is large enough
-insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') 
select key, value from src;
-
-desc formatted stats_part partition (ds='2010-04-08', hr = '13');
-
-set hive.stats.key.prefix.max.length=-1;

hive git commit: HIVE-12301: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix test failure for udf_percentile.q (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

2015-12-02 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 723f2d369 -> aa61697b5


HIVE-12301: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix 
test failure for udf_percentile.q (Pengcheng Xiong, reviewed by Jesus Camacho 
Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aa61697b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aa61697b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aa61697b

Branch: refs/heads/master
Commit: aa61697b589de11e0c2722bd6b3cf7b19fd7a5f7
Parents: 723f2d3
Author: Pengcheng Xiong 
Authored: Wed Dec 2 23:24:47 2015 +0800
Committer: Pengcheng Xiong 
Committed: Wed Dec 2 23:24:47 2015 +0800

--
 .../calcite/translator/HiveGBOpConvUtil.java| 104 -
 .../cbo_rp_groupby3_noskew_multi_distinct.q |  39 ++
 .../clientpositive/cbo_rp_udf_percentile.q  |  79 
 .../clientpositive/cbo_rp_udf_percentile2.q |  42 ++
 .../cbo_rp_groupby3_noskew_multi_distinct.q.out | 142 ++
 .../clientpositive/cbo_rp_udf_percentile.q.out  | 450 +++
 .../clientpositive/cbo_rp_udf_percentile2.q.out | 238 ++
 7 files changed, 1071 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/aa61697b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
index a129cf3..a6d809b 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
@@ -25,6 +25,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeMap;
 
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.AggregateCall;
@@ -88,6 +89,8 @@ public class HiveGBOpConvUtil {
 private GenericUDAFEvaluatorudafEvaluator;
 private final ArrayList udafParams  = 
new ArrayList();
 private List   udafParamsIndxInGBInfoDistExprs = new 
ArrayList();
+// We store the position of the argument for the function in the input.
+private List argList;
   };
 
   private static class GBInfo {
@@ -231,6 +234,7 @@ public class HiveGBOpConvUtil {
   inputOpAf.tabAlias);
   udafAttrs.udafParams.addAll(argExps);
   udafAttrs.udafName = aggCall.getAggregation().getName();
+  udafAttrs.argList = aggCall.getArgList();
   udafAttrs.isDistinctUDAF = aggCall.isDistinct();
   List argLst = new ArrayList(aggCall.getArgList());
   List distColIndicesOfUDAF = new ArrayList();
@@ -247,7 +251,7 @@ public class HiveGBOpConvUtil {
   // TODO: this seems wrong (following what Hive Regular does)
   if (!distParamInRefsToOutputPos.containsKey(argLst.get(i))
   && !deDupedNonDistIrefsSet.contains(argLst.get(i))) {
-deDupedNonDistIrefsSet.add(i);
+deDupedNonDistIrefsSet.add(argLst.get(i));
 gbInfo.deDupedNonDistIrefs.add(udafAttrs.udafParams.get(i));
   }
 }
@@ -992,10 +996,17 @@ public class HiveGBOpConvUtil {
   .get(rs.getConf().getOutputKeyColumnNames().size() - 1);
 }
 int numDistinctUDFs = 0;
-int distinctStartPosInReduceKeys = gbKeys.size();
 List reduceValues = rs.getConf().getValueCols();
 ArrayList aggregations = new ArrayList();
 int udafColStartPosInOriginalGB = gbInfo.gbKeys.size();
+// the positions in rsColInfoLst are as follows
+// --grpkey--,--distkey--,--values--
+// but distUDAF may be before/after some non-distUDAF, 
+// i.e., their positions can be mixed.
+// so we first process distUDAF and then non-distUDAF.
+// But we need to remember the sequence of udafs.
+List distinctPositions = new ArrayList<>();
+Map> indexToParameter = new TreeMap<>();
 for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
   UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
   ArrayList aggParameters = new ArrayList();
@@ -1003,40 +1014,77 @@ public class HiveGBOpConvUtil {
   ColumnInfo rsUDAFParamColInfo;
   ExprNodeDesc udafParam;
   ExprNodeDesc constantPropDistinctUDAFParam;
-  for (int j = 0; j < udafAttr.udafParams.size(); j++) {
-rsUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + 
j);
-String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
-// TODO: verify if this is needed
-if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) {
-  rsUDAFParamName 

hive git commit: HIVE-14957: HiveSortLimitPullUpConstantsRule misses branches when parent operator is Union (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

2016-10-17 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master e9c217fe6 -> e0e10a932


HIVE-14957: HiveSortLimitPullUpConstantsRule misses branches when parent 
operator is Union (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e0e10a93
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e0e10a93
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e0e10a93

Branch: refs/heads/master
Commit: e0e10a9324cfccc8bef7bccb33d9f1509832eba1
Parents: e9c217f
Author: Pengcheng Xiong 
Authored: Mon Oct 17 11:12:16 2016 -0700
Committer: Pengcheng Xiong 
Committed: Mon Oct 17 11:12:16 2016 -0700

--
 .../calcite/rules/HiveSortLimitPullUpConstantsRule.java  | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e0e10a93/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
index cc318db..3ec9dac 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
@@ -26,6 +26,7 @@ import org.apache.calcite.plan.RelOptPredicateList;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.rel.RelCollations;
 import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.RelNode;
@@ -158,7 +159,15 @@ public class HiveSortLimitPullUpConstantsRule extends 
RelOptRule {
 relBuilder.project(topChildExprs, topChildExprsFields);
 relBuilder.convert(sort.getRowType(), false);
 
-call.transformTo(parent.copy(parent.getTraitSet(), 
ImmutableList.of(relBuilder.build(;
+List inputs = new ArrayList<>();
+for (RelNode child : parent.getInputs()) {
+  if (!((HepRelVertex) child).getCurrentRel().equals(sort)) {
+inputs.add(child);
+  } else {
+inputs.add(relBuilder.build());
+  }
+}
+call.transformTo(parent.copy(parent.getTraitSet(), inputs));
   }
 
 }



hive git commit: HIVE-14957: HiveSortLimitPullUpConstantsRule misses branches when parent operator is Union (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

2016-10-17 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-2.1 ccf667559 -> 98952eb15


HIVE-14957: HiveSortLimitPullUpConstantsRule misses branches when parent 
operator is Union (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/98952eb1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/98952eb1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/98952eb1

Branch: refs/heads/branch-2.1
Commit: 98952eb15c720e3bfb4b715a3b78c20535aa16d3
Parents: ccf6675
Author: Pengcheng Xiong 
Authored: Mon Oct 17 11:12:16 2016 -0700
Committer: Pengcheng Xiong 
Committed: Mon Oct 17 11:15:55 2016 -0700

--
 .../calcite/rules/HiveSortLimitPullUpConstantsRule.java  | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/98952eb1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
index cc318db..3ec9dac 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
@@ -26,6 +26,7 @@ import org.apache.calcite.plan.RelOptPredicateList;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.rel.RelCollations;
 import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.RelNode;
@@ -158,7 +159,15 @@ public class HiveSortLimitPullUpConstantsRule extends 
RelOptRule {
 relBuilder.project(topChildExprs, topChildExprsFields);
 relBuilder.convert(sort.getRowType(), false);
 
-call.transformTo(parent.copy(parent.getTraitSet(), 
ImmutableList.of(relBuilder.build(;
+List inputs = new ArrayList<>();
+for (RelNode child : parent.getInputs()) {
+  if (!((HepRelVertex) child).getCurrentRel().equals(sort)) {
+inputs.add(child);
+  } else {
+inputs.add(relBuilder.build());
+  }
+}
+call.transformTo(parent.copy(parent.getTraitSet(), inputs));
   }
 
 }



[2/7] hive git commit: HIVE-12765: Support Intersect (distinct/all) Except (distinct/all) Minus (distinct/all) (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-10-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out 
b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out
new file mode 100644
index 000..69e0b17
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out
@@ -0,0 +1,1292 @@
+PREHOOK: query: create table a(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: a.value EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: create table b(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table b values (1,2),(2,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values (1,2),(2,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.key EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: b.value EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: select key, count(1) as c from a group by key intersect all 
select value, max(key) as c from b group by value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select key, count(1) as c from a group by key intersect all 
select value, max(key) as c from b group by value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+2  1
+PREHOOK: query: select * from a intersect distinct select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect distinct select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from b intersect distinct select * from a intersect 
distinct select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from b intersect distinct select * from a intersect 
distinct select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from a intersect distinct select * from b union all 
select * from a intersect distinct select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect distinct select * from b union all 
select * from a intersect distinct select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from a intersect distinct select * from b union 
select * from a intersect distinct select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect distinct select * from b union 
select * from a intersect distinct select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from a intersect distinct select * from b intersect 
distinct select * from a intersect distinct select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect distinct select * from b intersect 
distinct select * f

[3/7] hive git commit: HIVE-12765: Support Intersect (distinct/all) Except (distinct/all) Minus (distinct/all) (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-10-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/test/results/clientpositive/llap/intersect_all.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/intersect_all.q.out 
b/ql/src/test/results/clientpositive/llap/intersect_all.q.out
new file mode 100644
index 000..6d8b99d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/intersect_all.q.out
@@ -0,0 +1,1697 @@
+PREHOOK: query: create table a(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: a.value EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: create table b(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table b values (1,2),(2,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values (1,2),(2,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.key EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: b.value EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: select key, value, count(1) as c from a group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+ A masked pattern was here 
+POSTHOOK: query: select key, value, count(1) as c from a group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+ A masked pattern was here 
+1  2   2
+1  3   1
+2  3   1
+PREHOOK: query: select * from a intersect all select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect all select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from b intersect all select * from a intersect all 
select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from b intersect all select * from a intersect all 
select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from a intersect all select * from b union all select 
* from a intersect all select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect all select * from b union all 
select * from a intersect all select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from a intersect all select * from b union select * 
from a intersect all select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect all select * from b union select * 
from a intersect all select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from a intersect all select * from b intersect all 
select * from a intersect all select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a intersect all select * from b intersect all 
select * from a intersect all select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+1  2
+2  3
+PREHOOK: query: select * from (select a.key, b.value fr

[4/7] hive git commit: HIVE-12765: Support Intersect (distinct/all) Except (distinct/all) Minus (distinct/all) (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-10-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/test/results/clientpositive/llap/except_distinct.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/except_distinct.q.out 
b/ql/src/test/results/clientpositive/llap/except_distinct.q.out
new file mode 100644
index 000..5a19350
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/except_distinct.q.out
@@ -0,0 +1,894 @@
+PREHOOK: query: create table a(key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: insert into table a values 
(0),(1),(2),(2),(2),(2),(3),(NULL),(NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values 
(0),(1),(2),(2),(2),(2),(3),(NULL),(NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: create table b(key bigint)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(key bigint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table b values 
(1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values 
(1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.key EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: select * from a except distinct select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a except distinct select * from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+ A masked pattern was here 
+0
+PREHOOK: query: drop table a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@a
+PREHOOK: Output: default@a
+POSTHOOK: query: drop table a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@a
+POSTHOOK: Output: default@a
+PREHOOK: query: drop table b
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@b
+PREHOOK: Output: default@b
+POSTHOOK: query: drop table b
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@b
+POSTHOOK: Output: default@b
+PREHOOK: query: create table a(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3),(2,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.key EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: a.value EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: create table b(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values (1,2),(2,3),(2,2),(2,2),(2,20)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.key EXPRESSION 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: b.value EXPRESSION 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: select * from a except distinct select * from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+ A masked pattern was here 
+POSTHOOK: query: select * from a except distinct select * from b
+POSTHOOK: type: QUERY
+P

[6/7] hive git commit: HIVE-12765: Support Intersect (distinct/all) Except (distinct/all) Minus (distinct/all) (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-10-22 Thread pxiong
HIVE-12765: Support Intersect (distinct/all) Except (distinct/all) Minus 
(distinct/all) (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0049a21f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0049a21f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0049a21f

Branch: refs/heads/master
Commit: 0049a21f5442581b463f1bdfe0b1c12983c62ab2
Parents: 394fc47
Author: Pengcheng Xiong 
Authored: Sat Oct 22 13:48:26 2016 -0700
Committer: Pengcheng Xiong 
Committed: Sat Oct 22 13:48:26 2016 -0700

--
 .../test/resources/testconfiguration.properties |4 +
 .../org/apache/hadoop/hive/ql/ErrorMsg.java |2 +
 .../ql/optimizer/calcite/HiveCalciteUtil.java   |   73 +
 .../ql/optimizer/calcite/HiveRelFactories.java  |   16 +-
 .../calcite/reloperators/HiveExcept.java|   43 +
 .../calcite/reloperators/HiveIntersect.java |   43 +
 .../calcite/rules/HiveExceptRewriteRule.java|  375 
 .../calcite/rules/HiveIntersectMergeRule.java   |   88 +
 .../calcite/rules/HiveIntersectRewriteRule.java |  250 +++
 .../HiveProjectOverIntersectRemoveRule.java |   67 +
 .../rules/HiveSortLimitPullUpConstantsRule.java |7 +-
 .../calcite/translator/ASTConverter.java|   26 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java|  124 +-
 .../org/apache/hadoop/hive/ql/parse/HiveLexer.g |1 +
 .../apache/hadoop/hive/ql/parse/HiveParser.g|   22 +-
 .../hadoop/hive/ql/parse/IdentifiersParser.g|2 +-
 .../org/apache/hadoop/hive/ql/parse/QBExpr.java |2 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   38 +-
 .../TestSQL11ReservedKeyWordsNegative.java  |   23 +-
 ql/src/test/queries/clientpositive/except_all.q |   58 +
 .../queries/clientpositive/except_distinct.q|   58 +
 .../test/queries/clientpositive/intersect_all.q |   42 +
 .../queries/clientpositive/intersect_distinct.q |   42 +
 .../queries/clientpositive/intersect_merge.q|   27 +
 .../results/clientpositive/except_all.q.out |  986 +
 .../clientpositive/llap/except_distinct.q.out   |  894 
 .../clientpositive/llap/intersect_all.q.out | 1697 +++
 .../llap/intersect_distinct.q.out   | 1292 
 .../clientpositive/llap/intersect_merge.q.out   | 1956 ++
 29 files changed, 8177 insertions(+), 81 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 09833ff..4e91452 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -403,8 +403,12 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
 minillap.query.files=acid_bucket_pruning.q,\
   bucket5.q,\
   bucket6.q,\
+  except_distinct.q,\
   explainuser_2.q,\
   empty_dir_in_table.q,\
+  intersect_all.q,\
+  intersect_distinct.q,\
+  intersect_merge.q,\
   llap_udf.q,\
   llapdecider.q,\
   reduce_deduplicate.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index f308832..7ed3907 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -27,6 +27,7 @@ import java.util.regex.Pattern;
 import org.antlr.runtime.tree.Tree;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.ASTNodeOrigin;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 
 /**
  * List of all error messages.
@@ -450,6 +451,7 @@ public enum ErrorMsg {
   ACID_NOT_ENOUGH_HISTORY(10327, "Not enough history available for ({0},{1}).  
" +
 "Oldest available base: {2}", true),
   INVALID_COLUMN_NAME(10328, "Invalid column name"),
+  UNSUPPORTED_SET_OPERATOR(10329, "Unsupported set operator"),
   REPLACE_VIEW_WITH_MATERIALIZED(10400, "Attempt to replace view {0} with 
materialized view", true),
   REPLACE_MATERIALIZED_WITH_VIEW(10401, "Attempt to replace materialized view 
{0} with view", true),
   UPDATE_DELETE_VIEW(10402, "You cannot update or delete records in a view"),

http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java 

[1/7] hive git commit: HIVE-12765: Support Intersect (distinct/all) Except (distinct/all) Minus (distinct/all) (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-10-22 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 394fc47da -> efa39eab3


http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/test/results/clientpositive/llap/intersect_merge.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/intersect_merge.q.out 
b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out
new file mode 100644
index 000..40e657d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out
@@ -0,0 +1,1956 @@
+PREHOOK: query: create table a(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values (1,2),(1,2),(1,3),(2,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: a.value EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: create table b(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table b values (1,2),(2,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values (1,2),(2,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.key EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: b.value EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: explain select * from b intersect distinct select * from a 
intersect distinct select * from b intersect distinct select * from a intersect 
distinct select * from b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from b intersect distinct select * from a 
intersect distinct select * from b intersect distinct select * from a intersect 
distinct select * from b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Reducer 4 <- Union 3 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS)
+Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+Group By Operator
+  aggregations: count(1)
+  keys: _col0 (type: int), _col1 (type: int)
+  mode: hash
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 2 Data size: 6 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: int), _col1 (type: int)
+sort order: ++
+Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
+Statistics: Num rows: 2 Data size: 6 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col2 (type: bigint)
+Execution mode: llap
+LLAP IO: no inputs
+Map 11 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: int), value (type: int)
+outpu

[7/7] hive git commit: HIVE-14580: Introduce || operator (Zoltan Haindrich reviewed by Pengcheng Xiong)

2016-10-22 Thread pxiong
HIVE-14580: Introduce || operator (Zoltan Haindrich reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/efa39eab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/efa39eab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/efa39eab

Branch: refs/heads/master
Commit: efa39eab3a480aba4a3a4a0b3d56b3121382fe9b
Parents: 0049a21
Author: Pengcheng Xiong 
Authored: Sat Oct 22 14:14:34 2016 -0700
Committer: Pengcheng Xiong 
Committed: Sat Oct 22 14:25:11 2016 -0700

--
 .../org/apache/hadoop/hive/ql/parse/HiveLexer.g |   1 +
 .../hadoop/hive/ql/parse/IdentifiersParser.g|  16 +-
 ql/src/test/queries/clientpositive/concat_op.q  |  45 +++
 .../test/results/clientpositive/concat_op.q.out | 301 +++
 4 files changed, 362 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 17985d2..a0ff65d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -370,6 +370,7 @@ DIV : 'DIV';
 AMPERSAND : '&';
 TILDE : '~';
 BITWISEOR : '|';
+CONCATENATE : '||';
 BITWISEXOR : '^';
 QUESTION : '?';
 DOLLAR : '$';

http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index f79960a..13e2d17 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -437,6 +437,20 @@ precedencePlusExpression
 precedenceStarExpression (precedencePlusOperator^ 
precedenceStarExpression)*
 ;
 
+precedenceConcatenateOperator
+:
+CONCATENATE
+;
+
+precedenceConcatenateExpression
+:
+(precedencePlusExpression -> precedencePlusExpression)
+(
+precedenceConcatenateOperator plus=precedencePlusExpression
+-> ^(TOK_FUNCTION {adaptor.create(Identifier, "concat")} 
{$precedenceConcatenateExpression.tree} $plus)
+)*
+-> {$precedenceConcatenateExpression.tree}
+;
 
 precedenceAmpersandOperator
 :
@@ -445,7 +459,7 @@ precedenceAmpersandOperator
 
 precedenceAmpersandExpression
 :
-precedencePlusExpression (precedenceAmpersandOperator^ 
precedencePlusExpression)*
+precedenceConcatenateExpression (precedenceAmpersandOperator^ 
precedenceConcatenateExpression)*
 ;
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/test/queries/clientpositive/concat_op.q
--
diff --git a/ql/src/test/queries/clientpositive/concat_op.q 
b/ql/src/test/queries/clientpositive/concat_op.q
new file mode 100644
index 000..892
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/concat_op.q
@@ -0,0 +1,45 @@
+explain select key || value from src;
+
+select concat('a','b','c');
+select 'a' || 'b' || 'c';
+
+select '1' || 2+3;
+select 1+2 || '7';
+
+select 1 || 1 || 1;
+select 1.2 || 1.7;
+select 1 + 1 || 1 + 1;
+select 9 + 9 || 9 + 9;
+select 1 + 1 || 1 + 1 || 1 + 1;
+
+-- || has higher precedence than bitwise ops...so () is neccessary
+select '1' || 4 / 2 || 1 + 2 * 1 || (6 & 4) || (1 | 4);
+
+-- however ^ is different from the other bitwise ops:
+select 0 ^ 1 || '2' || 1 ^ 2;
+
+create table ct1 (c int);
+create table ct2 (c int);
+
+insert into ct1 values (7),(5),(3),(1);
+insert into ct2 values (8),(6),(4),(2);
+
+create view ct_v1 as select * from ct1 union all select * from ct2 order by c;
+
+select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || 
(c & c) from ct_v1;
+
+
+select *, 'x' || (c&3) , 'a' || c*c+c || 'b' from ct_v1
+   order by 'a' || c*c+c || 'b';
+
+select 'x' || (c&3),collect_list(c) from ct_v1
+   group by 'x' || (c&3);
+
+explain select concat('a','b','c');
+explain select 'a' || 'b' || 'c';
+
+-- check and/or precedence relation; should be true
+-- (true and false) or (false and true) or true => true
psql/mysql/ora/hive
+-- true and (false or false) and (true or true) => false   should not 
happen
+select true and false or false and true or true;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/efa39eab/ql/src/test/results/clientpositive/concat_op.q.out
--
diff --git a/ql/sr

[5/7] hive git commit: HIVE-12765: Support Intersect (distinct/all) Except (distinct/all) Minus (distinct/all) (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-10-22 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/test/queries/clientpositive/intersect_all.q
--
diff --git a/ql/src/test/queries/clientpositive/intersect_all.q 
b/ql/src/test/queries/clientpositive/intersect_all.q
new file mode 100644
index 000..a0d4afd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/intersect_all.q
@@ -0,0 +1,42 @@
+set hive.mapred.mode=nonstrict;
+set hive.cbo.enable=true;
+
+create table a(key int, value int);
+
+insert into table a values (1,2),(1,2),(1,3),(2,3);
+
+create table b(key int, value int);
+
+insert into table b values (1,2),(2,3);
+
+select key, value, count(1) as c from a group by key, value;
+
+select * from a intersect all select * from b;
+
+select * from b intersect all select * from a intersect all select * from b;
+
+select * from a intersect all select * from b union all select * from a 
intersect all select * from b;
+
+select * from a intersect all select * from b union select * from a intersect 
all select * from b;
+
+select * from a intersect all select * from b intersect all select * from a 
intersect all select * from b;
+
+select * from (select a.key, b.value from a join b on a.key=b.key)sub1 
+intersect all 
+select * from (select a.key, b.value from a join b on a.key=b.key)sub2; 
+
+select * from (select a.key, b.value from a join b on a.key=b.key)sub1
+intersect all
+select * from (select b.value as key, a.key as value from a join b on 
a.key=b.key)sub2;
+
+explain select * from src intersect all select * from src;
+
+select * from src intersect all select * from src;
+
+explain select * from src intersect all select * from src intersect all select 
* from src intersect all select * from src;
+
+select * from src intersect all select * from src intersect all select * from 
src intersect all select * from src;
+
+explain select value from a group by value intersect all select key from b 
group by key;
+
+select value from a group by value intersect all select key from b group by 
key;

http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/test/queries/clientpositive/intersect_distinct.q
--
diff --git a/ql/src/test/queries/clientpositive/intersect_distinct.q 
b/ql/src/test/queries/clientpositive/intersect_distinct.q
new file mode 100644
index 000..aa8155a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/intersect_distinct.q
@@ -0,0 +1,42 @@
+set hive.mapred.mode=nonstrict;
+set hive.cbo.enable=true;
+
+create table a(key int, value int);
+
+insert into table a values (1,2),(1,2),(1,3),(2,3);
+
+create table b(key int, value int);
+
+insert into table b values (1,2),(2,3);
+
+select key, count(1) as c from a group by key intersect all select value, 
max(key) as c from b group by value;
+
+select * from a intersect distinct select * from b;
+
+select * from b intersect distinct select * from a intersect distinct select * 
from b;
+
+select * from a intersect distinct select * from b union all select * from a 
intersect distinct select * from b;
+
+select * from a intersect distinct select * from b union select * from a 
intersect distinct select * from b;
+
+select * from a intersect distinct select * from b intersect distinct select * 
from a intersect distinct select * from b;
+
+select * from (select a.key, b.value from a join b on a.key=b.key)sub1 
+intersect distinct 
+select * from (select a.key, b.value from a join b on a.key=b.key)sub2; 
+
+select * from (select a.key, b.value from a join b on a.key=b.key)sub1
+intersect distinct
+select * from (select b.value as key, a.key as value from a join b on 
a.key=b.key)sub2;
+
+explain select * from src intersect distinct select * from src;
+
+select * from src intersect distinct select * from src;
+
+explain select * from src intersect distinct select * from src intersect 
distinct select * from src intersect distinct select * from src;
+
+select * from src intersect distinct select * from src intersect distinct 
select * from src intersect distinct select * from src;
+
+explain select value from a group by value intersect distinct select key from 
b group by key;
+
+select value from a group by value intersect distinct select key from b group 
by key;

http://git-wip-us.apache.org/repos/asf/hive/blob/0049a21f/ql/src/test/queries/clientpositive/intersect_merge.q
--
diff --git a/ql/src/test/queries/clientpositive/intersect_merge.q 
b/ql/src/test/queries/clientpositive/intersect_merge.q
new file mode 100644
index 000..0d8789e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/intersect_merge.q
@@ -0,0 +1,27 @@
+set hive.mapred.mode=nonstrict;
+set hive.cbo.enable=true;
+
+create table a(key int, value int);
+
+insert into table a values (1,2),(1,2),(1,3),(2,3);
+
+create table b(key int, value int);
+
+insert into table b values (1,2),(2,3);
+
+explain 

hive git commit: HIVE-15042: Support intersect/except without distinct keyword (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

2016-10-25 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master f99a6e844 -> 8a6d8186c


HIVE-15042: Support intersect/except without distinct keyword (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a6d8186
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a6d8186
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a6d8186

Branch: refs/heads/master
Commit: 8a6d8186c7e03d9958824ce4907766eaeb79921f
Parents: f99a6e8
Author: Pengcheng Xiong 
Authored: Tue Oct 25 20:52:35 2016 +0900
Committer: Pengcheng Xiong 
Committed: Tue Oct 25 20:52:35 2016 +0900

--
 .../apache/hadoop/hive/ql/parse/HiveParser.g|   6 +-
 .../queries/clientpositive/setop_no_distinct.q  |  51 
 .../clientpositive/setop_no_distinct.q.out  | 237 +++
 3 files changed, 291 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8a6d8186/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 7bf02bb..7b56be5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -2309,11 +2309,11 @@ setOperator
 : KW_UNION KW_ALL -> ^(TOK_UNIONALL)
 | KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT)
 | KW_INTERSECT KW_ALL -> ^(TOK_INTERSECTALL)
-| KW_INTERSECT KW_DISTINCT -> ^(TOK_INTERSECTDISTINCT)
+| KW_INTERSECT KW_DISTINCT? -> ^(TOK_INTERSECTDISTINCT)
 | KW_EXCEPT KW_ALL -> ^(TOK_EXCEPTALL)
-| KW_EXCEPT KW_DISTINCT -> ^(TOK_EXCEPTDISTINCT)
+| KW_EXCEPT KW_DISTINCT? -> ^(TOK_EXCEPTDISTINCT)
 | KW_MINUS KW_ALL -> ^(TOK_EXCEPTALL)
-| KW_MINUS KW_DISTINCT -> ^(TOK_EXCEPTDISTINCT)
+| KW_MINUS KW_DISTINCT? -> ^(TOK_EXCEPTDISTINCT)
 ;
 
 queryStatementExpression

http://git-wip-us.apache.org/repos/asf/hive/blob/8a6d8186/ql/src/test/queries/clientpositive/setop_no_distinct.q
--
diff --git a/ql/src/test/queries/clientpositive/setop_no_distinct.q 
b/ql/src/test/queries/clientpositive/setop_no_distinct.q
new file mode 100644
index 000..207954a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/setop_no_distinct.q
@@ -0,0 +1,51 @@
+set hive.mapred.mode=nonstrict;
+set hive.cbo.enable=true;
+
+create table a(key int, value int);
+
+insert into table a values (1,2),(1,2),(1,2),(1,3),(2,3);
+
+create table b(key int, value int);
+
+insert into table b values (1,2),(1,2),(2,3);
+
+select * from a intersect select * from b;
+
+(select * from b intersect (select * from a)) intersect select * from b;
+
+select * from b intersect all select * from a intersect select * from b;
+
+(select * from b) intersect all ((select * from a) intersect select * from b);
+
+select * from (select a.key, b.value from a join b on a.key=b.key)sub1 
+intersect 
+select * from (select a.key, b.value from a join b on a.key=b.key)sub2; 
+
+drop table a;
+
+drop table b;
+
+create table a(key int);
+
+insert into table a values 
(0),(1),(2),(2),(2),(2),(3),(NULL),(NULL),(NULL),(NULL),(NULL);
+
+create table b(key bigint);
+
+insert into table b values (1),(2),(2),(3),(5),(5),(NULL),(NULL),(NULL);
+
+select * from a except select * from b;
+
+(select * from a) minus select * from b union (select * from a) minus select * 
from b;
+
+(select * from a) minus select * from b union all ((select * from a) minus 
select * from b);
+
+(select * from a) minus select * from b union all (select * from a) minus all 
select * from b;
+
+select * from a minus select * from b minus (select * from a minus select * 
from b);
+
+(select * from a) minus (select * from b minus (select * from a minus select * 
from b));
+
+drop table a;
+
+drop table b;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/8a6d8186/ql/src/test/results/clientpositive/setop_no_distinct.q.out
--
diff --git a/ql/src/test/results/clientpositive/setop_no_distinct.q.out 
b/ql/src/test/results/clientpositive/setop_no_distinct.q.out
new file mode 100644
index 000..55e9ba7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/setop_no_distinct.q.out
@@ -0,0 +1,237 @@
+PREHOOK: query: create table a(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: insert into table a values (1,2),(1,2),(1,2),(1,3),(2,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: defaul

[2/2] hive git commit: HIVE-11387: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : fix reduce_deduplicate optimization (reviewed by Jesus Camacho Rodriguez, Hari Subramaniyan)

2015-08-11 Thread pxiong
HIVE-11387: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : fix 
reduce_deduplicate optimization (reviewed by Jesus Camacho Rodriguez, Hari 
Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a851d030
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a851d030
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a851d030

Branch: refs/heads/branch-1
Commit: a851d0308c93bc76cf67abf898fb9f26a1c59a70
Parents: be21c94
Author: Pengcheng Xiong 
Authored: Tue Aug 11 11:15:09 2015 -0700
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 11:15:09 2015 -0700

--
 .../correlation/AbstractCorrelationProcCtx.java |   7 +
 .../correlation/CorrelationUtilities.java   |  11 +-
 .../correlation/ReduceSinkDeDuplication.java|   6 +-
 ...i_insert_move_tasks_share_dependencies.q.out | 336 +++-
 ql/src/test/results/clientpositive/ptf.q.out|  27 +-
 ...i_insert_move_tasks_share_dependencies.q.out | 512 +++
 .../test/results/clientpositive/spark/ptf.q.out |  17 +-
 .../spark/union_remove_6_subq.q.out |  22 +-
 .../clientpositive/spark/vectorized_ptf.q.out   |  21 +-
 .../clientpositive/tez/explainuser_1.q.out  |  67 +--
 .../test/results/clientpositive/tez/ptf.q.out   |  15 +-
 .../clientpositive/tez/vectorized_ptf.q.out |  19 +-
 .../clientpositive/union_remove_6_subq.q.out|  34 +-
 .../results/clientpositive/vectorized_ptf.q.out |  67 +--
 14 files changed, 326 insertions(+), 835 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a851d030/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
index 174685b..5b673df 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer.correlation;
 
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER;
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESCRIPTOPERATORTRUST;
+import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE;
 
 import java.util.HashSet;
 import java.util.Set;
@@ -39,11 +40,13 @@ abstract class AbstractCorrelationProcCtx implements 
NodeProcessorCtx {
   // only one reducer if this configuration does not prevents
   private final int minReducer;
   private final Set> removedOps;
+  private final boolean isMapAggr;
 
   public AbstractCorrelationProcCtx(ParseContext pctx) {
 removedOps = new HashSet>();
 trustScript = pctx.getConf().getBoolVar(HIVESCRIPTOPERATORTRUST);
 minReducer = 
pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER);
+isMapAggr = pctx.getConf().getBoolVar(HIVEMAPSIDEAGGREGATE);
 this.pctx = pctx;
   }
 
@@ -70,4 +73,8 @@ abstract class AbstractCorrelationProcCtx implements 
NodeProcessorCtx {
   public boolean addRemovedOperator(Operator rsOp) {
 return removedOps.add(rsOp);
   }
+
+  public boolean isMapAggr() {
+return isMapAggr;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/a851d030/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
index 64bef21..7bb49be 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
@@ -29,6 +29,7 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.ForwardOperator;
@@ -44,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import 
org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication.ReduceSinkDeduplicateProcCtx;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 impor

[1/2] hive git commit: HIVE-11387: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : fix reduce_deduplicate optimization (reviewed by Jesus Camacho Rodriguez, Hari Subramaniyan)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1 be21c940d -> a851d0308


http://git-wip-us.apache.org/repos/asf/hive/blob/a851d030/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out 
b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index 6960bee..32514ca 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -1855,8 +1855,7 @@ STAGE PLANS:
 Spark
   Edges:
 Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
-Reducer 3 <- Reducer 2 (GROUP, 2)
-Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2)
+Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -1956,7 +1955,7 @@ STAGE PLANS:
   Reduce Output Operator
 key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int)
 sort order: +++
-Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
+Map-reduce partition columns: _col0 (type: string)
 Statistics: Num rows: 26 Data size: 16042 Basic stats: 
COMPLETE Column stats: NONE
 tag: -1
 auto parallelism: false
@@ -1968,22 +1967,6 @@ STAGE PLANS:
 mode: mergepartial
 outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE 
Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: string), _col1 (type: string)
-  sort order: ++
-  Map-reduce partition columns: _col0 (type: string)
-  Statistics: Num rows: 13 Data size: 8021 Basic stats: 
COMPLETE Column stats: NONE
-  tag: -1
-  value expressions: _col2 (type: int)
-  auto parallelism: false
-Execution mode: vectorized
-Reducer 4 
-Needs Tagging: false
-Reduce Operator Tree:
-  Select Operator
-expressions: KEY.reducesinkkey0 (type: string), 
KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE 
Column stats: NONE
 PTF Operator
   Function definitions:
   Input definition

http://git-wip-us.apache.org/repos/asf/hive/blob/a851d030/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out 
b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 0472da9..d5e26ca 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -6838,14 +6838,13 @@ Plan not optimized by CBO.
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 
 Stage-0
Fetch Operator
   limit:-1
   Stage-1
- Reducer 5
+ Reducer 4
  File Output Operator [FS_14]
 compressed:true
 Statistics:Num rows: 26 Data size: 6214 Basic stats: COMPLETE 
Column stats: COMPLETE
@@ -6856,52 +6855,42 @@ Stage-0
PTF Operator [PTF_11]
   Function definitions:[{"Input 
definition":{"type:":"WINDOWING"}},{"partition 
by:":"_col0","name:":"windowingtablefunction","order by:":"_col1"}]
   Statistics:Num rows: 26 Data size: 5798 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Select Operator [SEL_10]
+  Group By Operator [GBY_8]
+  |  keys:KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
   |  outputColumnNames:["_col0","_col1","_col2"]
   |  Statistics:Num rows: 26 Data size: 5798 Basic stats: 
COMPLETE Column stats: COMPLETE
-  |<-Reducer 4 [SIMPLE_EDGE]
- Reduce Output Operator [RS_9]
-key expressions:_col0 (type: string), _col1 (type: 
string)
+  |<-Reducer 3 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_7]
+key expressions:_col0 (type: string), _col1 (type: 
string), _col2 (type: int)
 Map-reduce partition columns:_col0 (type: string)
-sort order:++
+sort order:+++

hive git commit: HIVE-11416: CBO: Calcite Operator To Hive Operator (Calcite Return Path): Groupby Optimizer assumes the schema can match after removing RS and GBY (reviewed by Jesus Camacho Rodriguez

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 57ba795cb -> 763cb02b5


HIVE-11416: CBO: Calcite Operator To Hive Operator (Calcite Return Path): 
Groupby Optimizer assumes the schema can match after removing RS and GBY 
(reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/763cb02b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/763cb02b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/763cb02b

Branch: refs/heads/master
Commit: 763cb02b5eafb0ecd3fd0eb512636a1b092df671
Parents: 57ba795
Author: Pengcheng Xiong 
Authored: Tue Aug 11 11:26:48 2015 -0700
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 11:26:48 2015 -0700

--
 .../apache/hadoop/hive/ql/exec/Operator.java| 25 -
 .../hive/ql/optimizer/GroupByOptimizer.java | 58 +++-
 2 files changed, 57 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/763cb02b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
index 0f02737..acbe504 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
@@ -769,31 +769,6 @@ public abstract class Operator 
implements Serializable,C
 }
   }
 
-  // Remove the operators till a certain depth.
-  // Return true if the remove was successful, false otherwise
-  public boolean removeChildren(int depth) {
-Operator currOp = this;
-for (int i = 0; i < depth; i++) {
-  // If there are more than 1 children at any level, don't do anything
-  if ((currOp.getChildOperators() == null) || 
(currOp.getChildOperators().isEmpty()) ||
-  (currOp.getChildOperators().size() > 1)) {
-return false;
-  }
-  currOp = currOp.getChildOperators().get(0);
-}
-
-setChildOperators(currOp.getChildOperators());
-
-List> parentOps =
-  new ArrayList>();
-parentOps.add(this);
-
-for (Operator op : currOp.getChildOperators()) {
-  op.setParentOperators(parentOps);
-}
-return true;
-  }
-
   /**
* Replace one parent with another at the same position. Chilren of the new
* parent are not updated

http://git-wip-us.apache.org/repos/asf/hive/blob/763cb02b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
index af54286..ce3f59a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimizer;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -31,9 +32,12 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -520,12 +524,64 @@ public class GroupByOptimizer implements Transform {
 return;
   }
 
-  if (groupByOp.removeChildren(depth)) {
+  if (removeChildren(groupByOp, depth)) {
 // Use bucketized hive input format - that makes sure that one mapper 
reads the entire file
 groupByOp.setUseBucketizedHiveInputFormat(true);
 groupByOp.getConf().setMode(GroupByDesc.Mode.FINAL);
   }
 }
+
+// Remove the operators till a certain depth.
+// Return true if the remove was successful, false otherwise
+public boolean removeChildren(Operator currOp, int 
depth) {
+  Operator inputOp = currOp;
+  for (int i = 0; i < depth; i++) {
+// If there are more than 1 children at any level, don't do anything
+if ((currOp.getChildOperators() == null) || 
(currOp.getChildOperators().isEmpty())
+|| (currOp.getChildOperators().size() > 1)) {
+  return fal

[7/7] hive git commit: HIVE-9177: Fix child operator references after NonBlockingOpDeDupProc (II) (Szehon via Xuefu) merged from trunk, r1646994

2015-08-11 Thread pxiong
HIVE-9177: Fix child operator references after NonBlockingOpDeDupProc (II) 
(Szehon via Xuefu)
merged from trunk, r1646994

git-svn-id: https://svn.apache.org/repos/asf/hive/branches/spark@1646995 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/84af92e6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/84af92e6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/84af92e6

Branch: refs/heads/branch-1.0
Commit: 84af92e6520783d32abe1f5c6c263179748332ae
Parents: 329a336
Author: Xuefu Zhang 
Authored: Sat Dec 20 14:50:56 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 13:59:06 2015 -0700

--
 .../apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/84af92e6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
index 5e0959a..5291851 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
@@ -186,7 +186,9 @@ public class NonBlockingOpDeDupProc implements Transform {
* @param pSEL parent operator
*/
   private void fixContextReferences(SelectOperator cSEL, SelectOperator pSEL) {
-Collection qbJoinTrees = pctx.getJoinContext().values();
+Collection qbJoinTrees = new ArrayList();
+qbJoinTrees.addAll(pctx.getJoinContext().values());
+qbJoinTrees.addAll(pctx.getMapJoinContext().values());
 for (QBJoinTree qbJoinTree : qbJoinTrees) {
   Map> aliasToOpInfo = 
qbJoinTree.getAliasToOpInfo();
   for (Map.Entry> entry : 
aliasToOpInfo.entrySet()) {



[4/7] hive git commit: HIVE-8863 : Cannot drop table with uppercase name after compute statistics for columns (Chaoyu Tang via Ashutosh Chauhan)

2015-08-11 Thread pxiong
HIVE-8863 : Cannot drop table with uppercase name after compute statistics for 
columns (Chaoyu Tang via Ashutosh Chauhan)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1640943 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fba31e76
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fba31e76
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fba31e76

Branch: refs/heads/branch-1.0
Commit: fba31e76d3d32678258d62e49b80f8eaaab94216
Parents: dbdea20
Author: Ashutosh Chauhan 
Authored: Fri Nov 21 15:42:35 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 13:56:03 2015 -0700

--
 .../hive/common/util/HiveStringUtils.java   |   4 +
 .../hadoop/hive/metastore/ObjectStore.java  | 208 
 .../clientpositive/drop_partition_with_stats.q  |  68 +++
 .../clientpositive/drop_table_with_stats.q  |  43 ++
 .../drop_partition_with_stats.q.out | 496 +++
 .../clientpositive/drop_table_with_stats.q.out  | 236 +
 6 files changed, 956 insertions(+), 99 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/fba31e76/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
--
diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java 
b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
index 2705f1e..78cd983 100644
--- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
+++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
@@ -882,4 +882,8 @@ public class HiveStringUtils {
 }
 return len;
   }
+
+  public static String normalizeIdentifier(String identifier) {
+ return identifier.trim().toLowerCase();
+   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/fba31e76/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 135cadf..4f2106e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -139,6 +139,7 @@ import 
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hive.common.util.HiveStringUtils;
 import org.apache.thrift.TException;
 import org.datanucleus.store.rdbms.exceptions.MissingTableException;
 
@@ -503,7 +504,7 @@ public class ObjectStore implements RawStore, Configurable {
 boolean commited = false;
 try {
   openTransaction();
-  name = name.toLowerCase().trim();
+  name = HiveStringUtils.normalizeIdentifier(name);
   Query query = pm.newQuery(MDatabase.class, "name == dbname");
   query.declareParameters("java.lang.String dbname");
   query.setUnique(true);
@@ -615,7 +616,7 @@ public class ObjectStore implements RawStore, Configurable {
   public boolean dropDatabase(String dbname) throws NoSuchObjectException, 
MetaException {
 boolean success = false;
 LOG.info("Dropping database " + dbname + " along with all tables");
-dbname = dbname.toLowerCase();
+dbname = HiveStringUtils.normalizeIdentifier(dbname);
 try {
   openTransaction();
 
@@ -912,7 +913,7 @@ public class ObjectStore implements RawStore, Configurable {
 List tbls = null;
 try {
   openTransaction();
-  dbName = dbName.toLowerCase().trim();
+  dbName = HiveStringUtils.normalizeIdentifier(dbName);
   // Take the pattern and split it on the | to get all the composing
   // patterns
   String[] subpatterns = pattern.trim().split("\\|");
@@ -958,8 +959,8 @@ public class ObjectStore implements RawStore, Configurable {
 boolean commited = false;
 try {
   openTransaction();
-  db = db.toLowerCase().trim();
-  table = table.toLowerCase().trim();
+  db = HiveStringUtils.normalizeIdentifier(db);
+  table = HiveStringUtils.normalizeIdentifier(table);
   Query query = pm.newQuery(MTable.class, "tableName == table && 
database.name == db");
   query.declareParameters("java.lang.String table, java.lang.String db");
   query.setUnique(true);
@@ -982,7 +983,7 @@ public class ObjectStore implements RawStore, Configurable {
 try {
   openTransaction();
 
-  db = db.toLowerCase().trim();
+  db = HiveStringUtils.normalizeIdentifier(db);
   Query dbExistsQuery = pm.newQuery(MDatabase.class, "name ==

[1/7] hive git commit: HIVE-8448 : Union All might not work due to the type conversion issue (Yongzhi Chen via Szehon)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 7f29ee466 -> 84af92e65


HIVE-8448 : Union All might not work due to the type conversion issue (Yongzhi 
Chen via Szehon)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1632393 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/27a3fc20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/27a3fc20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/27a3fc20

Branch: refs/heads/branch-1.0
Commit: 27a3fc203e9a03e395ecc7e2d6d500027b854760
Parents: 7f29ee4
Author: Szehon Ho 
Authored: Thu Oct 16 17:33:38 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 13:52:15 2015 -0700

--
 .../hadoop/hive/ql/exec/UnionOperator.java  |  2 +-
 .../hive/ql/udf/generic/GenericUDFUtils.java| 28 +-
 .../queries/clientpositive/union_date_trim.q|  7 +++
 .../clientpositive/union_date_trim.q.out| 54 
 4 files changed, 89 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/27a3fc20/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
index 59c07c3..0f761e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
@@ -80,7 +80,7 @@ public class UnionOperator extends Operator 
implements Serializable {
 for (int p = 0; p < parents; p++) {
   assert (parentFields[p].size() == columns);
   for (int c = 0; c < columns; c++) {
-if (!columnTypeResolvers[c].update(parentFields[p].get(c)
+if (!columnTypeResolvers[c].updateForUnionAll(parentFields[p].get(c)
 .getFieldObjectInspector())) {
   // checked in SemanticAnalyzer. Should not happen
   throw new HiveException("Incompatible types for union operator");

http://git-wip-us.apache.org/repos/asf/hive/blob/27a3fc20/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
index 1f70c55..833452d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java
@@ -100,6 +100,26 @@ public final class GenericUDFUtils {
  * @return false if there is a type mismatch
  */
 public boolean update(ObjectInspector oi) throws UDFArgumentTypeException {
+  return update(oi, false);
+}
+
+/**
+ * Update returnObjectInspector and valueInspectorsAreTheSame based on the
+ * ObjectInspector seen for UnionAll.
+ *
+ * @return false if there is a type mismatch
+ */
+public boolean updateForUnionAll(ObjectInspector oi) throws 
UDFArgumentTypeException {
+  return update(oi, true);
+}
+
+/**
+ * Update returnObjectInspector and valueInspectorsAreTheSame based on the
+ * ObjectInspector seen.
+ *
+ * @return false if there is a type mismatch
+ */
+private boolean update(ObjectInspector oi, boolean isUnionAll) throws 
UDFArgumentTypeException {
   if (oi instanceof VoidObjectInspector) {
 return true;
   }
@@ -137,8 +157,14 @@ public final class GenericUDFUtils {
 
   // Types are different, we need to check whether we can convert them to
   // a common base class or not.
-  TypeInfo commonTypeInfo = FunctionRegistry.getCommonClass(oiTypeInfo,
+  TypeInfo commonTypeInfo = null;
+  if (isUnionAll) {
+commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(oiTypeInfo,
+  rTypeInfo);
+  } else {
+commonTypeInfo = FunctionRegistry.getCommonClass(oiTypeInfo,
   rTypeInfo);
+  }
   if (commonTypeInfo == null) {
 return false;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/27a3fc20/ql/src/test/queries/clientpositive/union_date_trim.q
--
diff --git a/ql/src/test/queries/clientpositive/union_date_trim.q 
b/ql/src/test/queries/clientpositive/union_date_trim.q
new file mode 100644
index 000..6842e56
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_date_trim.q
@@ -0,0 +1,7 @@
+drop table if exists testDate;
+create table testDate(id int, dt date);
+insert into table testDate select 1, '2014-04-07' from src where key=100 limit 
1;
+insert into table testDate select 2, '2014-

[3/7] hive git commit: HIVE-8594 : Wrong condition in SettableConfigUpdater#setHiveConfWhiteList() (Ted Yu via Szehon)

2015-08-11 Thread pxiong
HIVE-8594 : Wrong condition in SettableConfigUpdater#setHiveConfWhiteList() 
(Ted Yu via Szehon)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1640844 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dbdea20d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dbdea20d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dbdea20d

Branch: refs/heads/branch-1.0
Commit: dbdea20d22a9c64d450299406f74f13b8f0b7ac3
Parents: 6be7f51
Author: Szehon Ho 
Authored: Fri Nov 21 00:53:34 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 13:55:23 2015 -0700

--
 .../ql/security/authorization/plugin/SettableConfigUpdater.java| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/dbdea20d/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java
index 89f155c..f12cd51 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java
@@ -44,7 +44,7 @@ public class SettableConfigUpdater {
 String whiteListParamsStr = hiveConf
 .getVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST);
 
-if(whiteListParamsStr == null && whiteListParamsStr.trim().isEmpty()) {
+if(whiteListParamsStr == null || whiteListParamsStr.trim().isEmpty()) {
   throw new HiveAuthzPluginException("Configuration parameter "
   + ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST.varname
   + " is not iniatialized.");



[5/7] hive git commit: HIVE-9060: Fix child operator references after NonBlockingOpDeDupProc (Szehon via Xuefu) merged from trunk, r1644780

2015-08-11 Thread pxiong
HIVE-9060: Fix child operator references after NonBlockingOpDeDupProc (Szehon 
via Xuefu)
merged from trunk, r1644780

git-svn-id: https://svn.apache.org/repos/asf/hive/branches/spark@1644781 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2b9414b4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2b9414b4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2b9414b4

Branch: refs/heads/branch-1.0
Commit: 2b9414b4729333eadc15b499474b7f5f6c8678f1
Parents: fba31e7
Author: Xuefu Zhang 
Authored: Thu Dec 11 22:26:49 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 13:57:31 2015 -0700

--
 .../ql/optimizer/NonBlockingOpDeDupProc.java| 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2b9414b4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
index 63862b9..5e0959a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.optimizer;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
@@ -28,6 +29,8 @@ import java.util.Set;
 import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -39,11 +42,13 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.QBJoinTree;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
 /**
  * merges SEL-SEL or FIL-FIL into single operator
@@ -129,8 +134,8 @@ public class NonBlockingOpDeDupProc implements Transform {
   pSEL.removeChildAndAdoptItsChildren(cSEL);
   cSEL.setParentOperators(null);
   cSEL.setChildOperators(null);
+  fixContextReferences(cSEL, pSEL);
   cSEL = null;
-
   return null;
 }
 
@@ -175,6 +180,23 @@ public class NonBlockingOpDeDupProc implements Transform {
 }
   }
 
+  /**
+   * Change existing references in the context to point from child to parent 
operator.
+   * @param cSEL child operator (to be removed, and merged into parent)
+   * @param pSEL parent operator
+   */
+  private void fixContextReferences(SelectOperator cSEL, SelectOperator pSEL) {
+Collection qbJoinTrees = pctx.getJoinContext().values();
+for (QBJoinTree qbJoinTree : qbJoinTrees) {
+  Map> aliasToOpInfo = 
qbJoinTree.getAliasToOpInfo();
+  for (Map.Entry> entry : 
aliasToOpInfo.entrySet()) {
+if (entry.getValue() == cSEL) {
+  aliasToOpInfo.put(entry.getKey(), pSEL);
+}
+  }
+}
+  }
+
   private class FilterDedup implements NodeProcessor {
 @Override
 public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,



[6/7] hive git commit: HIVE-9113 : Explain on query failed with NPE (Navis reviewed by Szehon Ho)

2015-08-11 Thread pxiong
HIVE-9113 : Explain on query failed with NPE (Navis reviewed by Szehon Ho)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1646390 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/329a3368
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/329a3368
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/329a3368

Branch: refs/heads/branch-1.0
Commit: 329a33689a0bf1d96092cf0b600617d2a51099b5
Parents: 2b9414b
Author: Navis Ryu 
Authored: Thu Dec 18 06:41:09 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 13:58:16 2015 -0700

--
 .../apache/hadoop/hive/ql/parse/QBSubQuery.java | 30 ++--
 .../hadoop/hive/ql/parse/SubQueryUtils.java | 15 +-
 .../clientnegative/subquery_missing_from.q  |  1 +
 .../clientnegative/subquery_missing_from.q.out  |  3 ++
 4 files changed, 32 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/329a3368/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index 3c7b707..1b6b33b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -382,7 +382,7 @@ public class QBSubQuery implements ISubQueryJoinInfo {
 /*
  * row resolver of the SubQuery.
  * Set by the SemanticAnalyzer after the Plan for the SubQuery is genned.
- * This is neede in case the SubQuery select list contains a TOK_ALLCOLREF
+ * This is needed in case the SubQuery select list contains a TOK_ALLCOLREF
  */
 RowResolver sqRR;
 
@@ -513,7 +513,10 @@ public class QBSubQuery implements ISubQueryJoinInfo {
   String outerQueryAlias,
   Set outerQryAliases) throws SemanticException {
 
-ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1);
+ASTNode fromClause = getChildFromSubqueryAST("From", HiveParser.TOK_FROM);
+ASTNode insertClause = getChildFromSubqueryAST("Insert", 
HiveParser.TOK_INSERT);
+
+ASTNode selectClause = (ASTNode) insertClause.getChild(1);
 
 int selectExprStart = 0;
 if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) {
@@ -537,7 +540,7 @@ public class QBSubQuery implements ISubQueryJoinInfo {
  * Restriction 17.s :: SubQuery cannot use the same table alias as one 
used in
  * the Outer Query.
  */
-List sqAliases = SubQueryUtils.getTableAliasesInSubQuery(this);
+List sqAliases = 
SubQueryUtils.getTableAliasesInSubQuery(fromClause);
 String sharedAlias = null;
 for(String s : sqAliases ) {
   if ( outerQryAliases.contains(s) ) {
@@ -545,7 +548,7 @@ public class QBSubQuery implements ISubQueryJoinInfo {
   }
 }
 if ( sharedAlias != null) {
-  ASTNode whereClause = SubQueryUtils.subQueryWhere(subQueryAST);
+  ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
 
   if ( whereClause != null ) {
 ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(whereClause);
@@ -581,7 +584,7 @@ public class QBSubQuery implements ISubQueryJoinInfo {
   containsAggregationExprs = containsAggregationExprs | ( r == 1 );
 }
 
-rewrite(outerQueryRR, forHavingClause, outerQueryAlias);
+rewrite(outerQueryRR, forHavingClause, outerQueryAlias, insertClause, 
selectClause);
 
 SubQueryUtils.setOriginDeep(subQueryAST, originalSQASTOrigin);
 
@@ -631,6 +634,16 @@ public class QBSubQuery implements ISubQueryJoinInfo {
 
   }
 
+  private ASTNode getChildFromSubqueryAST(String errorMsg, int type) throws 
SemanticException {
+ASTNode childAST = (ASTNode) subQueryAST.getFirstChildWithType(type);
+if (childAST == null && errorMsg != null) {
+  subQueryAST.setOrigin(originalSQASTOrigin);
+  throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+  subQueryAST, errorMsg + " clause is missing in SubQuery."));
+}
+return childAST;
+  }
+
   private void setJoinType() {
 if ( operator.getType() == SubQueryType.NOT_IN ||
 operator.getType() == SubQueryType.NOT_EXISTS ) {
@@ -744,7 +757,7 @@ public class QBSubQuery implements ISubQueryJoinInfo {
* R2.x = min(R1.y)
*  Where R1 is an outer table reference, and R2 is a SubQuery table 
reference.
*   b. When hoisting the correlation predicate to a join predicate, we need 
to
-   *  rewrite it to be in the form the Join code allows: so the predicte 
needs
+   *  rewrite it to be in the form the Join code allows: so the predict 
needs
*  to contain a qualified column references.
*  We hand

[2/7] hive git commit: HIVE-8627: Compute stats on a table from impala caused the table to be corrupted (Na via Xuefu)

2015-08-11 Thread pxiong
HIVE-8627: Compute stats on a table from impala caused the table to be 
corrupted (Na via Xuefu)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1635309 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6be7f512
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6be7f512
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6be7f512

Branch: refs/heads/branch-1.0
Commit: 6be7f5127c3b0f5a25256456798c9d98ba2145f3
Parents: 27a3fc2
Author: Xuefu Zhang 
Authored: Wed Oct 29 21:38:01 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 13:53:47 2015 -0700

--
 .../java/org/apache/hadoop/hive/metastore/StatObjectConverter.java  | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/6be7f512/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
--
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
index 5c3bce3..475883b 100644
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
@@ -417,6 +417,7 @@ public class StatObjectConverter {
   public static void fillColumnStatisticsData(String colType, 
ColumnStatisticsData data,
   Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, 
Object dechigh,
   Object nulls, Object dist, Object avglen, Object maxlen, Object trues, 
Object falses) throws MetaException {
+colType = colType.toLowerCase();
 if (colType.equals("boolean")) {
   BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
   boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses));



[4/4] hive git commit: HIVE-10698 : query on view results fails with table not found error if view is created with subquery alias (CTE). (Pengcheng Xiong via Ashutosh Chauhan, John Pullokkaran)

2015-08-11 Thread pxiong
HIVE-10698 : query on view results fails with table not found error if view is 
created with subquery alias (CTE). (Pengcheng Xiong via Ashutosh Chauhan, John 
Pullokkaran)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1954c908
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1954c908
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1954c908

Branch: refs/heads/branch-1.0
Commit: 1954c90881f0072c385595d01f753e6c3c4317a6
Parents: e720b58
Author: Pengcheng Xiong 
Authored: Thu May 14 20:42:00 2015 -0700
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 14:18:13 2015 -0700

--
 .../apache/hadoop/hive/ql/parse/HiveParser.g|   4 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  66 ++---
 ql/src/test/queries/clientpositive/cteViews.q   |  41 
 .../test/results/clientpositive/cteViews.q.out  | 242 +++
 4 files changed, 309 insertions(+), 44 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1954c908/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index ef6d6f7..0fe1d69 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -2046,7 +2046,7 @@ queryStatementExpression[boolean topLevel]
 (w=withClause {topLevel}?)?
 queryStatementExpressionBody[topLevel] {
   if ($w.tree != null) {
-  adaptor.addChild($queryStatementExpressionBody.tree, $w.tree);
+  $queryStatementExpressionBody.tree.insertChild(0, $w.tree);
   }
 }
 ->  queryStatementExpressionBody
@@ -2164,7 +2164,7 @@ selectStatementWithCTE
 (w=withClause)?
 selectStatement[true] {
   if ($w.tree != null) {
-  adaptor.addChild($selectStatement.tree, $w.tree);
+  $selectStatement.tree.insertChild(0, $w.tree);
   }
 }
 ->  selectStatement

http://git-wip-us.apache.org/repos/asf/hive/blob/1954c908/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 63d5214..5deda9d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -798,10 +798,13 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 
 qb.getParseInfo().setSrcForAlias(alias, tableTree);
 
-unparseTranslator.addTableNameTranslation(tableTree, 
SessionState.get().getCurrentDatabase());
-if (aliasIndex != 0) {
-  unparseTranslator.addIdentifierTranslation((ASTNode) tabref
-  .getChild(aliasIndex));
+// if alias to CTE contains the alias, we do not do the translation because
+// cte is actually a subquery.
+if (!this.aliasToCTEs.containsKey(alias)) {
+  unparseTranslator.addTableNameTranslation(tableTree, 
SessionState.get().getCurrentDatabase());
+  if (aliasIndex != 0) {
+unparseTranslator.addIdentifierTranslation((ASTNode) 
tabref.getChild(aliasIndex));
+  }
 }
 
 return alias;
@@ -1019,19 +1022,6 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
*
*/
   private ASTNode findCTEFromName(QB qb, String cteName) {
-
-/*
- * When saving a view definition all table references in the AST are 
qualified; including CTE references.
- * Where as CTE definitions have no DB qualifier; so we strip out the DB 
qualifier before searching in
- * aliasToCTEs map.
- */
-String currDB = SessionState.get().getCurrentDatabase();
-if ( currDB != null && cteName.startsWith(currDB) &&
-cteName.length() > currDB.length() &&
-cteName.charAt(currDB.length()) == '.'   ) {
-  cteName = cteName.substring(currDB.length() + 1);
-}
-
 StringBuffer qId = new StringBuffer();
 if (qb.getId() != null) {
   qId.append(qb.getId());
@@ -1064,14 +1054,6 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 cteAlias = cteAlias == null ? cteName : cteAlias;
 ASTNode cteQryNode = findCTEFromName(qb, cteName);
 QBExpr cteQBExpr = new QBExpr(cteAlias);
-
-String cteText = ctx.getTokenRewriteStream().toString(
-cteQryNode.getTokenStartIndex(), cteQryNode.getTokenStopIndex());
-final ASTNodeOrigin cteOrigin = new ASTNodeOrigin("CTE", cteName,
-cteText, cteAlias, cteQryNode);
-cteQryNode = (ASTNode) ParseDriver.adaptor.dupTree(cteQryNode);
-SubQueryUtils.setOriginDeep(cteQr

[2/4] hive git commit: HIVE-7351 ANALYZE TABLE statement fails on postgres metastore (Navis via Alan Gates)

2015-08-11 Thread pxiong
HIVE-7351 ANALYZE TABLE statement fails on postgres metastore (Navis via Alan 
Gates)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1672696 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/20e1cc84
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/20e1cc84
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/20e1cc84

Branch: refs/heads/branch-1.0
Commit: 20e1cc84527b2135162c66770400dabe5ee05153
Parents: 8b9ba26
Author: Alan Gates 
Authored: Fri Apr 10 16:13:23 2015 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 14:16:46 2015 -0700

--
 .../apache/hadoop/hive/ql/exec/Utilities.java| 19 +++
 .../hive/ql/stats/jdbc/JDBCStatsAggregator.java  |  2 +-
 .../hive/ql/stats/jdbc/JDBCStatsPublisher.java   |  4 ++--
 3 files changed, 22 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/20e1cc84/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index b2db584..c696c1d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -47,6 +47,7 @@ import java.sql.Connection;
 import java.sql.DriverManager;
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
 import java.sql.SQLTransientException;
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
@@ -3016,6 +3017,24 @@ public final class Utilities {
 }
   }
 
+  public static void setQueryTimeout(java.sql.Statement stmt, int timeout) 
throws SQLException {
+if (timeout < 0) {
+  LOG.info("Invalid query timeout " + timeout);
+  return;
+}
+try {
+  stmt.setQueryTimeout(timeout);
+} catch (SQLException e) {
+  String message = e.getMessage() == null ? null : 
e.getMessage().toLowerCase();
+  if (e instanceof SQLFeatureNotSupportedException ||
+ (message != null && (message.contains("implemented") || 
message.contains("supported" {
+LOG.info("setQueryTimeout is not supported");
+return;
+  }
+  throw e;
+}
+  }
+
   /**
* Introducing a random factor to the wait time before another retry.
* The wait time is dependent on # of failures and a random factor.

http://git-wip-us.apache.org/repos/asf/hive/blob/20e1cc84/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
index e26031c..cf25bee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java
@@ -81,7 +81,7 @@ public class JDBCStatsAggregator implements StatsAggregator {
 Utilities.SQLCommand setQueryTimeout = new 
Utilities.SQLCommand() {
   @Override
   public Void run(PreparedStatement stmt) throws SQLException {
-stmt.setQueryTimeout(timeout);
+Utilities.setQueryTimeout(stmt, timeout);
 return null;
   }
 };

http://git-wip-us.apache.org/repos/asf/hive/blob/20e1cc84/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
index 32826e7..4028a9b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java
@@ -81,7 +81,7 @@ public class JDBCStatsPublisher implements StatsPublisher {
 Utilities.SQLCommand setQueryTimeout = new 
Utilities.SQLCommand() {
   @Override
   public Void run(PreparedStatement stmt) throws SQLException {
-stmt.setQueryTimeout(timeout);
+Utilities.setQueryTimeout(stmt, timeout);
 return null;
   }
 };
@@ -278,7 +278,7 @@ public class JDBCStatsPublisher implements StatsPublisher {
 conn = DriverManager.getConnection(connectionString);
 
 stmt = conn.createStatement();
-stmt.setQueryTimeout(timeout);
+Utilities.setQueryTimeout(stmt, timeout);
 
 // TODO: why is this not done using Hive db scripts?
 // Check if the table exists



[3/4] hive git commit: HIVE-10140 : Window boundary is not compared correctly (Aihua Xu via Ashutosh Chauhan)

2015-08-11 Thread pxiong
HIVE-10140 : Window boundary is not compared correctly (Aihua Xu via Ashutosh 
Chauhan)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e720b586
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e720b586
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e720b586

Branch: refs/heads/branch-1.0
Commit: e720b586bf0b6d881a58ddf38fecf132bccf9011
Parents: 20e1cc8
Author: Aihua Xu 
Authored: Thu Apr 30 09:42:00 2015 -0700
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 14:17:41 2015 -0700

--
 .../hadoop/hive/ql/parse/WindowingSpec.java |   7 +-
 .../clientpositive/windowing_windowspec.q   |   2 +
 .../clientpositive/windowing_windowspec.q.out   | 108 +++
 3 files changed, 115 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e720b586/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
index 28afc6b..83f3513 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
@@ -613,8 +613,10 @@ public class WindowingSpec {
   if (c != 0) {
 return c;
   }
+
   RangeBoundarySpec rb = (RangeBoundarySpec) other;
-  return amt - rb.amt;
+  // Valid range is "range/rows between 10 preceding and 2 preceding" for 
preceding case
+  return this.direction == Direction.PRECEDING ? rb.amt - amt : amt - 
rb.amt;
 }
 
   }
@@ -712,7 +714,8 @@ public class WindowingSpec {
 return c;
   }
   ValueBoundarySpec vb = (ValueBoundarySpec) other;
-  return amt - vb.amt;
+  // Valid range is "range/rows between 10 preceding and 2 preceding" for 
preceding case
+  return this.direction == Direction.PRECEDING ? vb.amt - amt : amt - 
vb.amt;
 }
 
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/e720b586/ql/src/test/queries/clientpositive/windowing_windowspec.q
--
diff --git a/ql/src/test/queries/clientpositive/windowing_windowspec.q 
b/ql/src/test/queries/clientpositive/windowing_windowspec.q
index 6d8ce67..2055e9d 100644
--- a/ql/src/test/queries/clientpositive/windowing_windowspec.q
+++ b/ql/src/test/queries/clientpositive/windowing_windowspec.q
@@ -31,6 +31,8 @@ select s, sum(i) over(partition by ts order by s) from 
over10k limit 100;
 
 select f, sum(f) over (partition by ts order by f range between unbounded 
preceding and current row) from over10k limit 100;
 
+select f, sum(f) over (partition by ts order by f rows between 2 preceding and 
1 preceding) from over10k limit 100;
+
 select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from 
over10k limit 7;
 
 select s, i, round((avg(d) over  w1 + 10.0) - (avg(d) over w1 - 10.0),2) from 
over10k window w1 as (partition by s order by i) limit 7;

http://git-wip-us.apache.org/repos/asf/hive/blob/e720b586/ql/src/test/results/clientpositive/windowing_windowspec.q.out
--
diff --git a/ql/src/test/results/clientpositive/windowing_windowspec.q.out 
b/ql/src/test/results/clientpositive/windowing_windowspec.q.out
index 00af6b8..de4ae97 100644
--- a/ql/src/test/results/clientpositive/windowing_windowspec.q.out
+++ b/ql/src/test/results/clientpositive/windowing_windowspec.q.out
@@ -800,6 +800,114 @@ POSTHOOK: Input: default@over10k
 71.68  722.6499947607517
 79.46  802.1099938452244
 80.02  882.1299904882908
+PREHOOK: query: select f, sum(f) over (partition by ts order by f rows between 
2 preceding and 1 preceding) from over10k limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+ A masked pattern was here 
+POSTHOOK: query: select f, sum(f) over (partition by ts order by f rows 
between 2 preceding and 1 preceding) from over10k limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+ A masked pattern was here 
+3.17   14.064196167
+10.89  28.60381469727
+14.54  43.3811444092
+14.78  58.064196167
+17.85  67.7868664551
+20.61  81.9300012588501
+28.69  96.378392334
+29.22  109.6953405762
+31.17  127.42999839782715
+38.35  137.3499984741211
+38.61  147.60999870300293
+39.48  156.9754223633
+40.54  160.2254223633
+41.6   167.7076293945
+46.08  182.5800018310547
+54.36  198.9754223633
+56.94  222.341525879
+64.96  249.7799949645996
+73.52  273.9618530273
+78.58  298.4700012207031
+81.41  318.2200012207031
+84.71  332.1300048828125
+87.43  344.9100036621094
+91.36  356.4508447266
+

[1/4] hive git commit: HIVE-9619: Uninitialized read of numBitVectors in NumDistinctValueEstimator (Alexander Pivovarov via gopalv)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 84af92e65 -> 1954c9088


HIVE-9619: Uninitialized read of numBitVectors in NumDistinctValueEstimator 
(Alexander Pivovarov via gopalv)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1660464 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8b9ba260
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8b9ba260
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8b9ba260

Branch: refs/heads/branch-1.0
Commit: 8b9ba2600e218561e52b8ae38701d801b31ce4da
Parents: 84af92e
Author: Gopal Vijayaraghavan 
Authored: Tue Feb 17 18:32:04 2015 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 14:16:22 2015 -0700

--
 .../udf/generic/NumDistinctValueEstimator.java  | 59 +++-
 1 file changed, 33 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8b9ba260/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
index 2817044..8212bea 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
@@ -35,18 +35,18 @@ public class NumDistinctValueEstimator {
* independent. As a consequence, the hash values will not distribute 
uniformly from 0 to 2^p-1
* thus introducing errors in the estimates.
*/
-  private static final int bitVectorSize = 31;
-  private int numBitVectors;
+  private static final int BIT_VECTOR_SIZE = 31;
+  private final int numBitVectors;
 
   // Refer to Flajolet-Martin'86 for the value of phi
-  private final double phi =  0.77351;
+  private static final double PHI = 0.77351;
 
-  private int[] a;
-  private int[] b;
-  private  FastBitSet[] bitVector = new FastBitSet[numBitVectors];
+  private final int[] a;
+  private final int[] b;
+  private final FastBitSet[] bitVector;
 
-  private Random aValue;
-  private Random bValue;
+  private final Random aValue;
+  private final Random bValue;
 
   /* Create a new distinctValueEstimator
*/
@@ -54,7 +54,7 @@ public class NumDistinctValueEstimator {
 this.numBitVectors = numBitVectors;
 bitVector = new FastBitSet[numBitVectors];
 for (int i=0; i< numBitVectors; i++) {
-  bitVector[i] = new FastBitSet(bitVectorSize);
+  bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
 }
 
 a = new int[numBitVectors];
@@ -98,23 +98,30 @@ public class NumDistinctValueEstimator {
   b[i] = randVal;
 
   if (a[i] < 0) {
-a[i] = a[i] + (1 << bitVectorSize - 1);
+a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1);
   }
 
   if (b[i] < 0) {
-b[i] = b[i] + (1 << bitVectorSize - 1);
+b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1);
   }
 }
   }
 
   public NumDistinctValueEstimator(String s, int numBitVectors) {
-FastBitSet b[] = deserialize(s, numBitVectors);
+this.numBitVectors = numBitVectors;
+FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors);
 bitVector = new FastBitSet[numBitVectors];
 for(int i=0; i > 1;
@@ -277,8 +284,8 @@ public class NumDistinctValueEstimator {
 int index;
 
 // Find the index of the least significant bit that is 1
-for (index=0; index> 1;
@@ -321,13 +328,13 @@ public class NumDistinctValueEstimator {
 
 for (int i=0; i < numBitVectors; i++) {
   int index = 0;
-  while (bitVector[i].get(index) && index < bitVectorSize) {
+  while (bitVector[i].get(index) && index < BIT_VECTOR_SIZE) {
 index = index + 1;
   }
   S = S + index;
 }
 
-numDistinctValues = ((numBitVectors/phi) * Math.pow(2.0, S/numBitVectors));
+numDistinctValues = ((numBitVectors/PHI) * Math.pow(2.0, S/numBitVectors));
 return ((long)numDistinctValues);
   }
 
@@ -345,7 +352,7 @@ public class NumDistinctValueEstimator {
 }
 
 avgLeastSigZero =
-(double)(sumLeastSigZero/(numBitVectors * 1.0)) - 
(Math.log(phi)/Math.log(2.0));
+(double)(sumLeastSigZero/(numBitVectors * 1.0)) - 
(Math.log(PHI)/Math.log(2.0));
 numDistinctValues = Math.pow(2.0, avgLeastSigZero);
 return ((long)(numDistinctValues));
   }



hive git commit: HIVE-8869 : RowSchema not updated for some ops when columns are pruned (Jesús Camacho Rodríguez via Ashutosh Chauhan)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 822ef79cd -> d3919332a


HIVE-8869 : RowSchema not updated for some ops when columns are pruned (Jesús 
Camacho Rodríguez via Ashutosh Chauhan)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1640227 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d3919332
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d3919332
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d3919332

Branch: refs/heads/branch-1.0
Commit: d3919332a0b14d9f8d5ab0d996525cc48f519ce0
Parents: 822ef79
Author: Ashutosh Chauhan 
Authored: Mon Nov 17 21:19:05 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 15:20:09 2015 -0700

--
 .../hive/ql/optimizer/ColumnPrunerProcCtx.java  | 14 +++-
 .../ql/optimizer/ColumnPrunerProcFactory.java   | 20 --
 .../clientnegative/udf_assert_true.q.out| 12 ++--
 .../clientnegative/udf_assert_true2.q.out   |  6 +-
 .../results/clientpositive/lateral_view.q.out   | 68 ++--
 .../clientpositive/lateral_view_noalias.q.out   | 22 +++
 .../results/clientpositive/udtf_stack.q.out | 12 ++--
 7 files changed, 87 insertions(+), 67 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/d3919332/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index fee9bcf..5d848a1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -23,6 +23,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
@@ -128,9 +129,16 @@ public class ColumnPrunerProcCtx implements 
NodeProcessorCtx {
   public List getColsFromSelectExpr(SelectOperator op) {
 List cols = new ArrayList();
 SelectDesc conf = op.getConf();
-List exprList = conf.getColList();
-for (ExprNodeDesc expr : exprList) {
-  cols = Utilities.mergeUniqElems(cols, expr.getCols());
+if(conf.isSelStarNoCompute()) {
+  for (ColumnInfo colInfo : op.getSchema().getSignature()) {
+cols.add(colInfo.getInternalName());
+  }
+}
+else {
+  List exprList = conf.getColList();
+for (ExprNodeDesc expr : exprList) {
+  cols = Utilities.mergeUniqElems(cols, expr.getCols());
+}
 }
 return cols;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/d3919332/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
index 2dc66f7..afd1738 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
@@ -386,12 +386,14 @@ public final class ColumnPrunerProcFactory {
 scanOp.setNeededColumnIDs(null);
 return null;
   }
+
   cols = cols == null ? new ArrayList() : cols;
 
   cppCtx.getPrunedColLists().put((Operator) nd,
   cols);
   RowResolver inputRR = 
cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
   setupNeededColumns(scanOp, inputRR, cols);
+
   return null;
 }
   }
@@ -569,7 +571,7 @@ public final class ColumnPrunerProcFactory {
   // following SEL will do CP for columns from UDTF, not adding SEL in here
   newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size()));
   op.getConf().setOutputInternalColNames(newColNames);
-
+  pruneOperator(ctx, op, newColNames);
   cppCtx.getPrunedColLists().put(op, colsAfterReplacement);
   return null;
 }
@@ -607,6 +609,12 @@ public final class ColumnPrunerProcFactory {
 ((SelectDesc)select.getConf()).setSelStarNoCompute(false);
 ((SelectDesc)select.getConf()).setColList(colList);
 ((SelectDesc)select.getConf()).setOutputColumnNames(outputColNames);
+pruneOperator(ctx, select, outputColNames);
+
+Operator udtfPath = 
op.getChildOperators().get(LateralViewJoinOperator.UDTF_TAG);
+List lvFCols = new 
ArrayList(cppCtx.getPrunedColLists().get(udtfPath));
+lvFCols = Utilities.me

hive git commit: HIVE-8926:: Projections that only swap input columns are identified incorrectly as identity projections (Jesus via Ashutosh Chauhan)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 d3919332a -> fbcef73cc


HIVE-8926:: Projections that only swap input columns are identified incorrectly 
as identity projections (Jesus via Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fbcef73c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fbcef73c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fbcef73c

Branch: refs/heads/branch-1.0
Commit: fbcef73ccd0ff740329d5ecaa94c57ad20212a0f
Parents: d391933
Author: Ashutosh Chauhan 
Authored: Sun Nov 23 06:32:09 2014 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 16:40:14 2015 -0700

--
 .../hadoop/hive/ql/exec/SelectOperator.java | 52 +---
 1 file changed, 46 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/fbcef73c/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
index 95d2d76..93017d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
@@ -24,6 +24,7 @@ import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -133,22 +134,61 @@ public class SelectOperator extends Operator 
implements Serializable
* @return if it is an identity select operator or not
*/
   public boolean isIdentitySelect() {
-//Safety check
+// Safety check
 if(this.getNumParent() != 1) {
   return false;
 }
 
-//Select *
-if(this.getConf().isSelStarNoCompute() ||
-this.getConf().isSelectStar()) {
+if(conf.isSelStarNoCompute()) {
   return true;
 }
 
-//Check whether the have the same schema
-if(!OperatorUtils.sameRowSchema(this, this.getParentOperators().get(0))) {
+// Check whether the have the same schema
+RowSchema orig = this.getSchema();
+RowSchema dest = this.getParentOperators().get(0).getSchema();
+if(orig.getSignature() == null && dest.getSignature() == null) {
+  return true;
+}
+if((orig.getSignature() == null && dest.getSignature() != null) ||
+(orig.getSignature() != null && dest.getSignature() == null) ) {
+  return false;
+}
+
+if(orig.getSignature().size() != dest.getSignature().size() ||
+orig.getSignature().size() != conf.getColList().size()) {
   return false;
 }
 
+for(int i=0; i

hive git commit: HIVE-9217: UnionProcessor misses results for multi-insert when hive.optimize.union.remove=true (Pengcheng Xiong via Laljo John Pullokkaran)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 fbcef73cc -> 02a08237d


HIVE-9217: UnionProcessor misses results for multi-insert when 
hive.optimize.union.remove=true (Pengcheng Xiong via Laljo John Pullokkaran)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1650409 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/02a08237
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/02a08237
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/02a08237

Branch: refs/heads/branch-1.0
Commit: 02a08237df27bd26584a4604f7d2fff5fd840475
Parents: fbcef73
Author: John Pullokkaran 
Authored: Thu Jan 8 22:41:21 2015 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 17:29:21 2015 -0700

--
 .../optimizer/unionproc/UnionProcFactory.java   |   15 +-
 .../clientpositive/union_remove_6_subq.q|   76 ++
 .../clientpositive/union_remove_6_subq.q.out| 1242 ++
 3 files changed, 1329 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
index a985c4f..94947d6 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
@@ -265,13 +265,20 @@ public final class UnionProcFactory {
 Operator operator =
   (Operator)stack.get(pos);
 
+// (1) Because we have operator.supportUnionRemoveOptimization() for
+// true only in SEL and FIL operators,
+// this rule will actually only match UNION%(SEL%|FIL%)*FS%
+// (2) The assumption here is that, if
+// operator.getChildOperators().size() > 1, we are going to have
+// multiple FS operators, i.e., multiple inserts.
+// Current implementation does not support this. More details, please
+// see HIVE-9217.
+if (operator.getChildOperators() != null && 
operator.getChildOperators().size() > 1) {
+  return null;
+}
 // Break if it encountered a union
 if (operator instanceof UnionOperator) {
   union = (UnionOperator)operator;
-  // No need for this optimization in case of multi-table inserts
-  if (union.getChildOperators().size() > 1) {
-return null;
-  }
   break;
 }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/02a08237/ql/src/test/queries/clientpositive/union_remove_6_subq.q
--
diff --git a/ql/src/test/queries/clientpositive/union_remove_6_subq.q 
b/ql/src/test/queries/clientpositive/union_remove_6_subq.q
new file mode 100644
index 000..8bcac6f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_remove_6_subq.q
@@ -0,0 +1,76 @@
+set hive.stats.autogather=false;
+set hive.optimize.union.remove=true;
+set hive.mapred.supports.subdirectories=true;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+
+-- This is to test the union->selectstar->filesink optimization
+-- Union of 2 subqueries is performed (all of which are mapred queries)
+-- followed by select star and a file sink in 2 output tables.
+-- The optimiaztion does not take affect since it is a multi-table insert.
+-- It does not matter, whether the output is merged or not. In this case,
+-- merging is turned off
+
+create table inputTbl1(key string, val string) stored as textfile;
+create table outputTbl1(key string, values bigint) stored as textfile;
+create table outputTbl2(key string, values bigint) stored as textfile;
+
+load data local inpath '../../data/files/T1.txt' into table inputTbl1;
+
+explain
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+FROM (
+  select * from(
+  SELECT key, count(1) as values from inputTbl1 group by key
+  UNION ALL
+  SELECT key, count(1) as values from inputTbl1 group by key
+  )subq
+) a
+insert overwrite table outputTbl1 select *
+insert overwrite table outputTbl2 select *;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+select * from outputTbl1 order by key, values;
+select * from outputTbl2 order by key, values;
+
+-- The following queries guarantee the correctness.
+ex

hive git commit: HIVE-9397 : SELECT max(bar) FROM foo is broken after ANALYZE ... FOR COLUMNS (Navis via Ashutosh Chauhan)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 02a08237d -> 842c527ae


HIVE-9397 : SELECT max(bar) FROM foo is broken after ANALYZE ... FOR COLUMNS 
(Navis via Ashutosh Chauhan)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1657445 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/842c527a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/842c527a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/842c527a

Branch: refs/heads/branch-1.0
Commit: 842c527ae124e537932e3fcfb058279fefbc8d14
Parents: 02a0823
Author: Ashutosh Chauhan 
Authored: Thu Feb 5 00:02:54 2015 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 17:42:27 2015 -0700

--
 .../hive/ql/optimizer/StatsOptimizer.java   | 186 ---
 .../hive/ql/udf/generic/GenericUDAFSum.java |  23 +
 .../clientpositive/metadata_only_queries.q.out  |   8 +-
 .../metadata_only_queries_with_filters.q.out|   4 +-
 .../spark/metadata_only_queries.q.out   | 498 +++
 .../metadata_only_queries_with_filters.q.out| 224 +
 .../tez/metadata_only_queries.q.out |   8 +-
 7 files changed, 882 insertions(+), 69 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/842c527a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 6a43d1c..c312fc3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -34,7 +34,6 @@ import 
org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -73,8 +72,7 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hive.common.util.AnnotationUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.thrift.TException;
 
 import com.google.common.collect.Lists;
@@ -140,6 +138,22 @@ public class StatsOptimizer implements Transform {
   Unsupported
 }
 
+enum LongSubType {
+  BIGINT { Object cast(long longValue) { return longValue; } }, 
+  INT { Object cast(long longValue) { return (int)longValue; } },
+  SMALLINT { Object cast(long longValue) { return (short)longValue; } },
+  TINYINT { Object cast(long longValue) { return (byte)longValue; } };
+
+  abstract Object cast(long longValue);
+}
+
+enum DoubleSubType {
+  DOUBLE { Object cast(double doubleValue) { return doubleValue; } },
+  FLOAT { Object cast(double doubleValue) { return (float) doubleValue; } 
};
+
+  abstract Object cast(double doubleValue);
+}
+
 private StatType getType(String origType) {
   if (serdeConstants.IntegralTypes.contains(origType)) {
 return StatType.Integeral;
@@ -187,54 +201,56 @@ public class StatsOptimizer implements Transform {
 
   try {
 TableScanOperator tsOp = (TableScanOperator) stack.get(0);
-if(tsOp.getParentOperators() != null && 
tsOp.getParentOperators().size() > 0) {
+if (tsOp.getNumParent() > 0) {
   // looks like a subq plan.
   return null;
 }
-SelectOperator selOp = (SelectOperator)tsOp.getChildren().get(0);
-for(ExprNodeDesc desc : selOp.getConf().getColList()) {
+SelectOperator pselOp = (SelectOperator)stack.get(1);
+for(ExprNodeDesc desc : pselOp.getConf().getColList()) {
   if (!((desc instanceof ExprNodeColumnDesc) || (desc instanceof 
ExprNodeConstantDesc))) {
 // Probably an expression, cant handle that
 return null;
   }
 }
-Map exprMap = selOp.getColumnExprMap();
-// Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS
+Map exprMap = pselOp.getColumnExprMap();
+// Since we have done an exact match on T

hive git commit: HIVE-9507 : Make "LATERAL VIEW inline(expression) mytable" tolerant to nulls (Navis reviewed by Ashutosh Chauhan)

2015-08-11 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 842c527ae -> cafd55593


HIVE-9507 : Make "LATERAL VIEW inline(expression) mytable" tolerant to nulls 
(Navis reviewed by Ashutosh Chauhan)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1658625 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cafd5559
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cafd5559
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cafd5559

Branch: refs/heads/branch-1.0
Commit: cafd555930dc1cc4edfb3ff1a7210d007092e842
Parents: 842c527
Author: Navis Ryu 
Authored: Tue Feb 10 06:47:09 2015 +
Committer: Pengcheng Xiong 
Committed: Tue Aug 11 23:24:50 2015 -0700

--
 .../apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java | 8 ++--
 .../hadoop/hive/serde2/lazybinary/LazyBinaryArray.java   | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/cafd5559/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java
index 2152d97..ab5a2bf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFInline.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.udf.generic;
 
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
@@ -57,8 +58,11 @@ public class GenericUDTFInline extends GenericUDTF {
 
   @Override
   public void process(Object[] os) throws HiveException {
-for (Object row : new ArrayList(li.getList(os[0]))) {
-  forward(row);
+List list = li.getList(os[0]);
+if (list != null && !list.isEmpty()) {
+  for (Object row : list.toArray()) {
+forward(row);
+  }
 }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/cafd5559/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
--
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
index 4929f67..fee1472 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
@@ -214,7 +214,7 @@ public class LazyBinaryArray extends
 
   /**
* cachedList is reused every time getList is called. Different
-   * LazyBianryArray instances cannot share the same cachedList.
+   * LazyBinaryArray instances cannot share the same cachedList.
*/
   ArrayList cachedList;
 



hive git commit: HIVE-11348: Support START TRANSACTION/COMMIT/ROLLBACK commands: support SQL2011 reserved keywords (Pengcheng Xiong reviewed by Eugene Koifman)

2015-08-12 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 2ee30c485 -> df138f2b1


HIVE-11348: Support START TRANSACTION/COMMIT/ROLLBACK commands: support SQL2011 
reserved keywords (Pengcheng Xiong reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/df138f2b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/df138f2b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/df138f2b

Branch: refs/heads/master
Commit: df138f2b133c62ac31021c179ce2d04cabcf210e
Parents: 2ee30c4
Author: Pengcheng Xiong 
Authored: Wed Aug 12 10:04:54 2015 -0700
Committer: Pengcheng Xiong 
Committed: Wed Aug 12 10:05:49 2015 -0700

--
 .../apache/hadoop/hive/ql/parse/IdentifiersParser.g | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/df138f2b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 501287d..64af7d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -614,7 +614,13 @@ principalIdentifier
 | QuotedIdentifier
 ;
 
-//the new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old 
version of nonReserved 
+//The new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old 
version of nonReserved
+//Non reserved keywords are basically the keywords that can be used as 
identifiers.
+//All the KW_* are automatically not only keywords, but also reserved keywords.
+//That means, they can NOT be used as identifiers.
+//If you would like to use them as identifiers, put them in the nonReserved 
list below.
+//If you are not sure, please refer to the SQL2011 column in
+//http://www.postgresql.org/docs/9.5/static/sql-keywords-appendix.html
 nonReserved
 :
 KW_ADD | KW_ADMIN | KW_AFTER | KW_ANALYZE | KW_ARCHIVE | KW_ASC | 
KW_BEFORE | KW_BUCKET | KW_BUCKETS
@@ -636,11 +642,7 @@ nonReserved
 | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | 
KW_UNIONTYPE | KW_UNLOCK | KW_UNSET
 | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE 
| KW_VIEW | KW_WHILE | KW_YEAR
 | KW_WORK
-| KW_START
 | KW_TRANSACTION
-| KW_COMMIT
-| KW_ROLLBACK
-| KW_ONLY
 | KW_WRITE
 | KW_ISOLATION
 | KW_LEVEL
@@ -648,13 +650,15 @@ nonReserved
 | KW_AUTOCOMMIT
 ;
 
-//The following SQL2011 reserved keywords are used as cast function name only, 
it is a subset of the sql11ReservedKeywordsUsedAsIdentifier.
+//The following SQL2011 reserved keywords are used as cast function name only, 
but not as identifiers.
 sql11ReservedKeywordsUsedAsCastFunctionName
 :
 KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | 
KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_INT | KW_SMALLINT | 
KW_TIMESTAMP
 ;
 
 //The following SQL2011 reserved keywords are used as identifiers in many q 
tests, they may be added back due to backward compatibility.
+//We are planning to remove the following whole list after several releases.
+//Thus, please do not change the following list unless you know what to do.
 sql11ReservedKeywordsUsedAsIdentifier
 :
 KW_ALL | KW_ALTER | KW_ARRAY | KW_AS | KW_AUTHORIZATION | KW_BETWEEN | 
KW_BIGINT | KW_BINARY | KW_BOOLEAN 



hive git commit: HIVE-11537 : Branch-1.0 build is broken (Pengcheng Xiong reviewed by Swarnim Kulkarni)

2015-08-12 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 cafd55593 -> a4eb78c82


HIVE-11537 : Branch-1.0 build is broken (Pengcheng Xiong reviewed by Swarnim 
Kulkarni)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4eb78c8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4eb78c8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4eb78c8

Branch: refs/heads/branch-1.0
Commit: a4eb78c82e50458701f76cd6ffe6474d4ec6b8c1
Parents: cafd555
Author: Pengcheng Xiong 
Authored: Wed Aug 12 10:33:14 2015 -0700
Committer: Pengcheng Xiong 
Committed: Wed Aug 12 10:33:14 2015 -0700

--
 ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a4eb78c8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index c312fc3..5e398ad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;



hive git commit: HIVE-11480: CBO: Calcite Operator To Hive Operator (Calcite Return Path): char/varchar as input to GenericUDAF (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

2015-08-12 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master c4ceefb4c -> 0140df748


HIVE-11480: CBO: Calcite Operator To Hive Operator (Calcite Return Path): 
char/varchar as input to GenericUDAF (Pengcheng Xiong, reviewed by Jesus 
Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0140df74
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0140df74
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0140df74

Branch: refs/heads/master
Commit: 0140df748f6714cc327132f008a13f6af5e41397
Parents: c4ceefb
Author: Pengcheng Xiong 
Authored: Wed Aug 12 10:43:35 2015 -0700
Committer: Pengcheng Xiong 
Committed: Wed Aug 12 10:43:35 2015 -0700

--
 .../java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java | 2 ++
 .../org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java | 2 ++
 2 files changed, 4 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0140df74/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
index 159a2fe..071884c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java
@@ -55,6 +55,8 @@ public class GenericUDAFStd extends GenericUDAFVariance {
 case FLOAT:
 case DOUBLE:
 case STRING:
+case VARCHAR:
+case CHAR:
 case TIMESTAMP:
 case DECIMAL:
   return new GenericUDAFStdEvaluator();

http://git-wip-us.apache.org/repos/asf/hive/blob/0140df74/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
index 3545390..2950605 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
@@ -72,6 +72,8 @@ public class GenericUDAFVariance extends 
AbstractGenericUDAFResolver {
 case FLOAT:
 case DOUBLE:
 case STRING:
+case VARCHAR:
+case CHAR:
 case TIMESTAMP:
 case DECIMAL:
   return new GenericUDAFVarianceEvaluator();



hive git commit: HIVE-11348: Support START TRANSACTION/COMMIT/ROLLBACK commands: support SQL2011 reserved keywords (Pengcheng Xiong reviewed by Eugene Koifman)

2015-08-12 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1 425273eed -> d9b5d5322


HIVE-11348: Support START TRANSACTION/COMMIT/ROLLBACK commands: support SQL2011 
reserved keywords (Pengcheng Xiong reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d9b5d532
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d9b5d532
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d9b5d532

Branch: refs/heads/branch-1
Commit: d9b5d5322b8c93d56888f25d5bab473d6fe295b7
Parents: 425273e
Author: Pengcheng Xiong 
Authored: Wed Aug 12 10:04:54 2015 -0700
Committer: Pengcheng Xiong 
Committed: Wed Aug 12 11:11:18 2015 -0700

--
 .../apache/hadoop/hive/ql/parse/IdentifiersParser.g | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/d9b5d532/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 501287d..64af7d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -614,7 +614,13 @@ principalIdentifier
 | QuotedIdentifier
 ;
 
-//the new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old 
version of nonReserved 
+//The new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old 
version of nonReserved
+//Non reserved keywords are basically the keywords that can be used as 
identifiers.
+//All the KW_* are automatically not only keywords, but also reserved keywords.
+//That means, they can NOT be used as identifiers.
+//If you would like to use them as identifiers, put them in the nonReserved 
list below.
+//If you are not sure, please refer to the SQL2011 column in
+//http://www.postgresql.org/docs/9.5/static/sql-keywords-appendix.html
 nonReserved
 :
 KW_ADD | KW_ADMIN | KW_AFTER | KW_ANALYZE | KW_ARCHIVE | KW_ASC | 
KW_BEFORE | KW_BUCKET | KW_BUCKETS
@@ -636,11 +642,7 @@ nonReserved
 | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | 
KW_UNIONTYPE | KW_UNLOCK | KW_UNSET
 | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE 
| KW_VIEW | KW_WHILE | KW_YEAR
 | KW_WORK
-| KW_START
 | KW_TRANSACTION
-| KW_COMMIT
-| KW_ROLLBACK
-| KW_ONLY
 | KW_WRITE
 | KW_ISOLATION
 | KW_LEVEL
@@ -648,13 +650,15 @@ nonReserved
 | KW_AUTOCOMMIT
 ;
 
-//The following SQL2011 reserved keywords are used as cast function name only, 
it is a subset of the sql11ReservedKeywordsUsedAsIdentifier.
+//The following SQL2011 reserved keywords are used as cast function name only, 
but not as identifiers.
 sql11ReservedKeywordsUsedAsCastFunctionName
 :
 KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | 
KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_INT | KW_SMALLINT | 
KW_TIMESTAMP
 ;
 
 //The following SQL2011 reserved keywords are used as identifiers in many q 
tests, they may be added back due to backward compatibility.
+//We are planning to remove the following whole list after several releases.
+//Thus, please do not change the following list unless you know what to do.
 sql11ReservedKeywordsUsedAsIdentifier
 :
 KW_ALL | KW_ALTER | KW_ARRAY | KW_AS | KW_AUTHORIZATION | KW_BETWEEN | 
KW_BIGINT | KW_BINARY | KW_BOOLEAN 



hive git commit: HIVE-10183:: [CBO] self-join failing in a test case (Ashutosh Chauhan via Pengcheng Xiong)

2015-08-12 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 4e7260ba3 -> d5dcd3e3e


HIVE-10183:: [CBO] self-join failing in a test case (Ashutosh Chauhan via 
Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d5dcd3e3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d5dcd3e3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d5dcd3e3

Branch: refs/heads/branch-1.0
Commit: d5dcd3e3eb2a5c8d932862ff43a5085cb8805186
Parents: 4e7260b
Author: Pengcheng Xiong 
Authored: Wed Aug 12 13:26:51 2015 -0700
Committer: Pengcheng Xiong 
Committed: Wed Aug 12 13:26:51 2015 -0700

--
 .../translator/PlanModifierForASTConv.java  | 29 ++--
 1 file changed, 26 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/d5dcd3e3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
index 57f030b..98f3e26 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/PlanModifierForASTConv.java
@@ -30,6 +30,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
+import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.eigenbase.rel.AggregateCall;
 import org.eigenbase.rel.AggregateRelBase;
@@ -53,6 +54,7 @@ import org.eigenbase.reltype.RelDataTypeFactory;
 import org.eigenbase.rex.RexNode;
 import org.eigenbase.sql.SqlKind;
 import org.eigenbase.util.Pair;
+import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -94,6 +96,23 @@ public class PlanModifierForASTConv {
 return newTopNode;
   }
 
+  private static String getTblAlias(RelNode rel) {
+
+if (null == rel) {
+  return null;
+}
+if (rel instanceof HiveTableScanRel) {
+  return 
((RelOptHiveTable)((HiveTableScanRel)rel).getTable()).getTableAlias();
+}
+if (rel instanceof HiveProjectRel) {
+  return null;
+}
+if (rel.getInputs().size() == 1) {
+  return getTblAlias(rel.getInput(0));
+}
+return null;
+  }
+
   private static void convertOpTree(RelNode rel, RelNode parent) {
 
 if (rel instanceof EmptyRel) {
@@ -104,10 +123,14 @@ public class PlanModifierForASTConv {
   if (!validJoinParent(rel, parent)) {
 introduceDerivedTable(rel, parent);
   }
+  String leftChild = getTblAlias(((JoinRelBase)rel).getLeft());
+  if (null != leftChild && 
leftChild.equalsIgnoreCase(getTblAlias(((JoinRelBase)rel).getRight( {
+// introduce derived table above one child, if this is self-join
+// since user provided aliases are lost at this point.
+introduceDerivedTable(((JoinRelBase)rel).getLeft(), rel);
+  }
 } else if (rel instanceof MultiJoinRel) {
-  throw new RuntimeException("Found MultiJoinRel");
-} else if (rel instanceof OneRowRelBase) {
-  throw new RuntimeException("Found OneRowRelBase");
+  throw new RuntimeException("Found MultiJoin");
 } else if (rel instanceof RelSubset) {
   throw new RuntimeException("Found RelSubset");
 } else if (rel instanceof SetOpRel) {



hive git commit: HIVE-11551 : Branch 1.0 : test cases regression after cherry-pick HIVE-9397 (Pengcheng Xiong)

2015-08-13 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 d5dcd3e3e -> f47b0849c


HIVE-11551 : Branch 1.0 : test cases regression after cherry-pick HIVE-9397 
(Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f47b0849
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f47b0849
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f47b0849

Branch: refs/heads/branch-1.0
Commit: f47b0849ce3a6a1c50d755c1d50b62f17c1e8e47
Parents: d5dcd3e
Author: Pengcheng Xiong 
Authored: Thu Aug 13 17:28:06 2015 -0700
Committer: Pengcheng Xiong 
Committed: Thu Aug 13 17:28:06 2015 -0700

--
 .../org/apache/hadoop/hive/ql/exec/ColumnInfo.java   | 15 +++
 .../apache/hadoop/hive/ql/exec/SelectOperator.java   |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f47b0849/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
index a34a31d..e3cc946 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
@@ -237,4 +237,19 @@ public class ColumnInfo implements Serializable {
   public void setObjectinspector(ObjectInspector writableObjectInspector) {
 this.objectInspector = writableObjectInspector;
   }
+
+  public boolean internalEquals(ColumnInfo dest) {
+if (dest == null) {
+  return false;
+}
+
+if ((!checkEquals(internalName, dest.getInternalName()))
+|| (!checkEquals(getType(), dest.getType())) || (isSkewedCol != 
dest.isSkewedCol())
+|| (isVirtualCol != dest.getIsVirtualCol())
+|| (isHiddenVirtualCol != dest.isHiddenVirtualCol())) {
+  return false;
+}
+
+return true;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f47b0849/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
index 93017d3..82b7cbf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
@@ -172,7 +172,7 @@ public class SelectOperator extends Operator 
implements Serializable
 return false;
   }
 
-  if(!origColumn.equals(destColumn)) {
+  if(!origColumn.internalEquals(destColumn)) {
 return false;
   }
 



hive git commit: HIVE-10122 : Hive metastore filter-by-expression is broken for non-partition expressions (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

2015-08-13 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 f47b0849c -> 301de8353


HIVE-10122 : Hive metastore filter-by-expression is broken for non-partition 
expressions (Sergey Shelukhin, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/301de835
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/301de835
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/301de835

Branch: refs/heads/branch-1.0
Commit: 301de8353c2bc1bf47008296851e4f8efe4c9b4c
Parents: f47b084
Author: Pengcheng Xiong 
Authored: Thu Aug 13 18:11:42 2015 -0700
Committer: Pengcheng Xiong 
Committed: Thu Aug 13 18:11:42 2015 -0700

--
 .../hive/ql/optimizer/ppr/PartitionPruner.java  | 101 ++-
 1 file changed, 52 insertions(+), 49 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/301de835/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
index 4b2a81a..7c305b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -194,16 +194,13 @@ public class PartitionPruner implements Transform {
 // Remove all parts that are not partition columns. See javadoc for 
details.
 ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
 String oldFilter = prunerExpr.getExprString();
-if (isBooleanExpr(compactExpr)) {
-   // For null and true values, return every partition
-   if (!isFalseExpr(compactExpr)) {
-   // Non-strict mode, and all the predicates are on non-partition 
columns - get everything.
-   LOG.debug("Filter " + oldFilter + " was null after compacting");
-   return getAllPartsFromCacheOrServer(tab, key, true, 
prunedPartitionsMap);
-   } else {
-   return new PrunedPartitionList(tab, new 
LinkedHashSet(new ArrayList()),
-   new ArrayList(), false);
-   }
+if (compactExpr == null || isBooleanExpr(compactExpr)) {
+  if (isFalseExpr(compactExpr)) {
+return new PrunedPartitionList(
+tab, new LinkedHashSet(0), new ArrayList(0), 
false);
+  }
+  // For null and true values, return every partition
+  return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
 }
 LOG.debug("Filter w/ compacting: " + compactExpr.getExprString()
 + "; filter w/o compacting: " + oldFilter);
@@ -235,22 +232,22 @@ public class PartitionPruner implements Transform {
 partsCache.put(key, ppList);
 return ppList;
   }
-  
+
   static private boolean isBooleanExpr(ExprNodeDesc expr) {
- return  expr != null && expr instanceof ExprNodeConstantDesc && 
+return  expr != null && expr instanceof ExprNodeConstantDesc &&
   ((ExprNodeConstantDesc)expr).getTypeInfo() instanceof 
PrimitiveTypeInfo &&
   
((PrimitiveTypeInfo)(((ExprNodeConstantDesc)expr).getTypeInfo())).
-  getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME);
+  getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME);
   }
   static private boolean isTrueExpr(ExprNodeDesc expr) {
-  return  isBooleanExpr(expr) &&  
-  ((ExprNodeConstantDesc)expr).getValue() != null &&
-  ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.TRUE);
+  return  isBooleanExpr(expr) &&
+  ((ExprNodeConstantDesc)expr).getValue() != null &&
+  ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.TRUE);
   }
   static private boolean isFalseExpr(ExprNodeDesc expr) {
-  return  isBooleanExpr(expr) && 
+  return  isBooleanExpr(expr) &&
   ((ExprNodeConstantDesc)expr).getValue() != null &&
-  ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.FALSE);   
  
+  ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.FALSE);
   }
 
   /**
@@ -262,42 +259,48 @@ public class PartitionPruner implements Transform {
*/
   static private ExprNodeDesc compactExpr(ExprNodeDesc expr) {
 // If this is a constant boolean expression, return the value.
-   if (expr == null) {
-   return null;
-   }
-   if (expr instanceof ExprNodeConstantDesc) {
-  if (isBooleanExpr(expr)) {
-return expr;
-  } else {
-throw new IllegalStateException("Unexpected non-null 
ExprNodeConstantDesc: "
-  + expr.getExprString());
+if (expr == null) {
+  return null;
+}
+if (expr instanceof ExprNodeConstantDesc) {
+  if (((Ex

hive git commit: HIVE-11493: Predicate with integer column equals double evaluates to false (Pengcheng Xiong, reviewed by Hari Sankar Sivarama Subramaniyan)

2015-08-13 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/master 6e7629193 -> b8f1ae110


HIVE-11493: Predicate with integer column equals double evaluates to false 
(Pengcheng Xiong, reviewed by Hari Sankar Sivarama Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b8f1ae11
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b8f1ae11
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b8f1ae11

Branch: refs/heads/master
Commit: b8f1ae110616a8fe162f79140c785f76be76fc67
Parents: 6e76291
Author: Pengcheng Xiong 
Authored: Thu Aug 13 21:01:54 2015 -0700
Committer: Pengcheng Xiong 
Committed: Thu Aug 13 21:02:40 2015 -0700

--
 .../hive/ql/parse/TypeCheckProcFactory.java |  2 +-
 .../clientpositive/cast_tinyint_to_double.q |  7 
 .../clientpositive/cast_tinyint_to_double.q.out | 38 
 .../clientpositive/infer_const_type.q.out   |  7 ++--
 .../clientpositive/spark/vectorization_0.q.out  |  2 +-
 .../spark/vectorization_short_regress.q.out | 20 +--
 .../clientpositive/tez/vectorization_0.q.out|  2 +-
 .../tez/vectorization_short_regress.q.out   | 20 +--
 .../clientpositive/vectorization_0.q.out|  2 +-
 .../vectorization_short_regress.q.out   | 20 +--
 10 files changed, 84 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b8f1ae11/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index cd68f4e..ab5d006 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -1034,7 +1034,7 @@ public class TypeCheckProcFactory {
   // we'll try again to convert it to double
   // however, if we already tried this, or the column is NUMBER 
type and
   // the operator is EQUAL, return false due to the type mismatch
-  if (triedDouble ||
+  if (triedDouble &&
   (genericUDF instanceof GenericUDFOPEqual
   && !columnType.equals(serdeConstants.STRING_TYPE_NAME))) {
 return new ExprNodeConstantDesc(false);

http://git-wip-us.apache.org/repos/asf/hive/blob/b8f1ae11/ql/src/test/queries/clientpositive/cast_tinyint_to_double.q
--
diff --git a/ql/src/test/queries/clientpositive/cast_tinyint_to_double.q 
b/ql/src/test/queries/clientpositive/cast_tinyint_to_double.q
new file mode 100644
index 000..59c5e89
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cast_tinyint_to_double.q
@@ -0,0 +1,7 @@
+drop table t;
+CREATE TABLE t(c tinyint);
+insert overwrite table t select 10 from src limit 1;
+
+select * from t where c = 10.0;
+
+select * from t where c = -10.0;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/b8f1ae11/ql/src/test/results/clientpositive/cast_tinyint_to_double.q.out
--
diff --git a/ql/src/test/results/clientpositive/cast_tinyint_to_double.q.out 
b/ql/src/test/results/clientpositive/cast_tinyint_to_double.q.out
new file mode 100644
index 000..c29df65
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cast_tinyint_to_double.q.out
@@ -0,0 +1,38 @@
+PREHOOK: query: drop table t
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table t
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE t(c tinyint)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: CREATE TABLE t(c tinyint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert overwrite table t select 10 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t
+POSTHOOK: query: insert overwrite table t select 10 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.c EXPRESSION []
+PREHOOK: query: select * from t where c = 10.0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+ A masked pattern was here 
+POSTHOOK: query: select * from t where c = 10.0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+ A masked pattern was here 
+10
+PREHOOK: query: select * from t where c = -10.0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+ A masked pattern was here 
+POSTHOOK: query: select * from t where c = -10.0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+ A maske

[6/7] hive git commit: backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result in value '0' after running 'analyze table TABLE_NAME compute statistics;

2015-08-13 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/orc_analyze.q.out
--
diff --git a/ql/src/test/results/clientpositive/orc_analyze.q.out 
b/ql/src/test/results/clientpositive/orc_analyze.q.out
index b422db5..858da6c 100644
--- a/ql/src/test/results/clientpositive/orc_analyze.q.out
+++ b/ql/src/test/results/clientpositive/orc_analyze.q.out
@@ -71,6 +71,55 @@ POSTHOOK: Lineage: orc_create_people.last_name SIMPLE 
[(orc_create_people_stagin
 POSTHOOK: Lineage: orc_create_people.salary SIMPLE 
[(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, 
type:decimal(10,0), comment:null), ]
 POSTHOOK: Lineage: orc_create_people.start_date SIMPLE 
[(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date,
 type:timestamp, comment:null), ]
 POSTHOOK: Lineage: orc_create_people.state SIMPLE 
[(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:state, 
type:string, comment:null), ]
+PREHOOK: query: analyze table orc_create_people compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_people
+PREHOOK: Output: default@orc_create_people
+POSTHOOK: query: analyze table orc_create_people compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_people
+POSTHOOK: Output: default@orc_create_people
+PREHOOK: query: desc formatted orc_create_people
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@orc_create_people
+POSTHOOK: query: desc formatted orc_create_people
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@orc_create_people
+# col_name data_type   comment 
+
+id int 
+first_name string  
+last_name  string  
+addressstring  
+salary decimal(10,0)   
+start_date timestamp   
+state  string  
+
+# Detailed Table Information
+Database:  default  
+ A masked pattern was here 
+Protect Mode:  None 
+Retention: 0
+ A masked pattern was here 
+Table Type:MANAGED_TABLE
+Table Parameters:   
+   COLUMN_STATS_ACCURATE   true
+   numFiles1   
+   numRows 100 
+   rawDataSize 52600   
+   totalSize   3158
+ A masked pattern was here 
+
+# Storage Information   
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat:   org.apache.hadoop.hive.ql.io.orc.OrcInputFormat  
+OutputFormat:  org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
 
+Compressed:No   
+Num Buckets:   -1   
+Bucket Columns:[]   
+Sort Columns:  []   
+Storage Desc Params:
+   serialization.format1   
 PREHOOK: query: analyze table orc_create_people compute statistics partialscan
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_create_people
@@ -120,6 +169,55 @@ Bucket Columns:[]
 Sort Columns:  []   
 Storage Desc Params:
serialization.format1   
+PREHOOK: query: analyze table orc_create_people compute statistics noscan
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_people
+PREHOOK: Output: default@orc_create_people
+POSTHOOK: query: analyze table orc_create_people compute statistics noscan
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_people
+POSTHOOK: Output: default@orc_create_people
+PREHOOK: query: desc formatted orc_create_people
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@orc_create_people
+POSTHOOK: query: desc formatted orc_create_people
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@orc_create_people
+# col_name data_type   comment 
+
+id int 
+first_name string  
+last_name  string  
+addressstring  
+salary decimal(10,0)   
+start_date timestamp   
+state  string  
+   

[3/7] hive git commit: backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result in value '0' after running 'analyze table TABLE_NAME compute statistics;

2015-08-13 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
index aa477cd..11683d4 100644
--- a/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
@@ -146,14 +146,14 @@ STAGE PLANS:
 Map Operator Tree:
 TableScan
   alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
 predicate: 762 = cbigint) or ((csmallint < cfloat) and 
((ctimestamp2 > -5) and (cdouble <> cint or (cstring1 = 'a')) or ((cbigint 
<= -1.389) and ((cstring2 <> 'a') and ((79.553 <> cint) and (cboolean2 <> 
cboolean1) (type: boolean)
-Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 12288 Data size: 2641964 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: cint (type: int), cdouble (type: double), 
csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint)
   outputColumnNames: cint, cdouble, csmallint, cfloat, 
ctinyint
-  Statistics: Num rows: 12288 Data size: 377237 Basic 
stats: COMPLETE Column stats: NONE
+  Statistics: Num rows: 12288 Data size: 2641964 Basic 
stats: COMPLETE Column stats: NONE
   Group By Operator
 aggregations: avg(cint), sum(cdouble), 
stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), 
stddev_samp(cint), min(ctinyint), count(csmallint)
 mode: hash
@@ -358,14 +358,14 @@ STAGE PLANS:
 Map Operator Tree:
 TableScan
   alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
 predicate: (cbigint <= 197) and (cint < cbigint)) or 
((cdouble >= -26.28) and (csmallint > cdouble))) or ((ctinyint > cfloat) and 
(cstring1 rlike '.*ss.*'))) or ((cfloat > 79.553) and (cstring2 like '10%'))) 
(type: boolean)
-Statistics: Num rows: 6826 Data size: 209555 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 6826 Data size: 1467614 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: cint (type: int), cbigint (type: bigint), 
csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint)
   outputColumnNames: cint, cbigint, csmallint, cdouble, 
ctinyint
-  Statistics: Num rows: 6826 Data size: 209555 Basic 
stats: COMPLETE Column stats: NONE
+  Statistics: Num rows: 6826 Data size: 1467614 Basic 
stats: COMPLETE Column stats: NONE
   Group By Operator
 aggregations: max(cint), var_pop(cbigint), 
stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), 
stddev_samp(csmallint), var_samp(cint)
 mode: hash
@@ -561,14 +561,14 @@ STAGE PLANS:
 Map Operator Tree:
 TableScan
   alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
 predicate: (ctimestamp1 = ctimestamp2) or (762.0 = 
cfloat)) or (cstring1 = 'ss')) or ((csmallint <= cbigint) and (1 = cboolean2))) 
or (cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a' 
(type: boolean)
-Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 12288 Data size: 2641964 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: cbigint (type: bigint), ctinyint (type: 
tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double)
   outputColumnNames: cbigint, ctinyint, csmallint, cint, 
cdouble
-  Statistics: Num rows: 12288 Data size: 3

[5/7] hive git commit: backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result in value '0' after running 'analyze table TABLE_NAME compute statistics;

2015-08-13 Thread pxiong
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/orc_analyze.q.out 
b/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
index b422db5..858da6c 100644
--- a/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
@@ -71,6 +71,55 @@ POSTHOOK: Lineage: orc_create_people.last_name SIMPLE 
[(orc_create_people_stagin
 POSTHOOK: Lineage: orc_create_people.salary SIMPLE 
[(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:salary, 
type:decimal(10,0), comment:null), ]
 POSTHOOK: Lineage: orc_create_people.start_date SIMPLE 
[(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:start_date,
 type:timestamp, comment:null), ]
 POSTHOOK: Lineage: orc_create_people.state SIMPLE 
[(orc_create_people_staging)orc_create_people_staging.FieldSchema(name:state, 
type:string, comment:null), ]
+PREHOOK: query: analyze table orc_create_people compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_people
+PREHOOK: Output: default@orc_create_people
+POSTHOOK: query: analyze table orc_create_people compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_people
+POSTHOOK: Output: default@orc_create_people
+PREHOOK: query: desc formatted orc_create_people
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@orc_create_people
+POSTHOOK: query: desc formatted orc_create_people
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@orc_create_people
+# col_name data_type   comment 
+
+id int 
+first_name string  
+last_name  string  
+addressstring  
+salary decimal(10,0)   
+start_date timestamp   
+state  string  
+
+# Detailed Table Information
+Database:  default  
+ A masked pattern was here 
+Protect Mode:  None 
+Retention: 0
+ A masked pattern was here 
+Table Type:MANAGED_TABLE
+Table Parameters:   
+   COLUMN_STATS_ACCURATE   true
+   numFiles1   
+   numRows 100 
+   rawDataSize 52600   
+   totalSize   3158
+ A masked pattern was here 
+
+# Storage Information   
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat:   org.apache.hadoop.hive.ql.io.orc.OrcInputFormat  
+OutputFormat:  org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
 
+Compressed:No   
+Num Buckets:   -1   
+Bucket Columns:[]   
+Sort Columns:  []   
+Storage Desc Params:
+   serialization.format1   
 PREHOOK: query: analyze table orc_create_people compute statistics partialscan
 PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_create_people
@@ -120,6 +169,55 @@ Bucket Columns:[]
 Sort Columns:  []   
 Storage Desc Params:
serialization.format1   
+PREHOOK: query: analyze table orc_create_people compute statistics noscan
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_people
+PREHOOK: Output: default@orc_create_people
+POSTHOOK: query: analyze table orc_create_people compute statistics noscan
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_people
+POSTHOOK: Output: default@orc_create_people
+PREHOOK: query: desc formatted orc_create_people
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@orc_create_people
+POSTHOOK: query: desc formatted orc_create_people
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@orc_create_people
+# col_name data_type   comment 
+
+id int 
+first_name string  
+last_name  string  
+addressstring  
+salary decimal(10,0)   
+start_date timestamp   
+state  string   

[1/7] hive git commit: backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result in value '0' after running 'analyze table TABLE_NAME compute statistics;

2015-08-13 Thread pxiong
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 301de8353 -> 9eb95813a


http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/vectorization_short_regress.q.out
index 34394c5..ffb1401 100644
--- a/ql/src/test/results/clientpositive/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_short_regress.q.out
@@ -141,14 +141,14 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan
 alias: alltypesorc
-Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
 Filter Operator
   predicate: 762 = cbigint) or ((csmallint < cfloat) and 
((ctimestamp2 > -5) and (cdouble <> cint or (cstring1 = 'a')) or ((cbigint 
<= -1.389) and ((cstring2 <> 'a') and ((79.553 <> cint) and (cboolean2 <> 
cboolean1) (type: boolean)
-  Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
   Select Operator
 expressions: cint (type: int), cdouble (type: double), 
csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint)
 outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint
-Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: avg(cint), sum(cdouble), stddev_pop(cint), 
stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), 
min(ctinyint), count(csmallint)
   mode: hash
@@ -347,14 +347,14 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan
 alias: alltypesorc
-Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
 Filter Operator
   predicate: (cbigint <= 197) and (cint < cbigint)) or 
((cdouble >= -26.28) and (csmallint > cdouble))) or ((ctinyint > cfloat) and 
(cstring1 rlike '.*ss.*'))) or ((cfloat > 79.553) and (cstring2 like '10%'))) 
(type: boolean)
-  Statistics: Num rows: 6826 Data size: 209555 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 6826 Data size: 1467614 Basic stats: 
COMPLETE Column stats: NONE
   Select Operator
 expressions: cint (type: int), cbigint (type: bigint), 
csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint)
 outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint
-Statistics: Num rows: 6826 Data size: 209555 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 6826 Data size: 1467614 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: max(cint), var_pop(cbigint), 
stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), 
stddev_samp(csmallint), var_samp(cint)
   mode: hash
@@ -544,14 +544,14 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan
 alias: alltypesorc
-Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
 Filter Operator
   predicate: (ctimestamp1 = ctimestamp2) or (762.0 = cfloat)) 
or (cstring1 = 'ss')) or ((csmallint <= cbigint) and (1 = cboolean2))) or 
(cboolean1 is not null and (ctimestamp2 is not null and (cstring2 > 'a' 
(type: boolean)
-  Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
   Select Operator
 expressions: cbigint (type: bigint), ctinyint (type: tinyint), 
csmallint (type: smallint), cint (type: int), cdouble (type: double)
 outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble
-Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations:

[7/7] hive git commit: backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result in value '0' after running 'analyze table TABLE_NAME compute statistics;

2015-08-13 Thread pxiong
backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for 
an ORC table will result in value '0' after running 'analyze table TABLE_NAME 
compute statistics;' (Prasanth Jayachandran reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9eb95813
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9eb95813
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9eb95813

Branch: refs/heads/branch-1.0
Commit: 9eb95813a0b58601642fe9293d7cea8cbb0a2215
Parents: 301de83
Author: Pengcheng Xiong 
Authored: Thu Aug 13 23:16:32 2015 -0700
Committer: Pengcheng Xiong 
Committed: Thu Aug 13 23:16:32 2015 -0700

--
 .../hadoop/hive/ql/exec/StatsNoJobTask.java |   8 +-
 .../hive/ql/optimizer/GenMRTableScan1.java  |  17 +-
 .../hive/ql/parse/ProcessAnalyzeTable.java  |  17 +-
 .../hadoop/hive/ql/plan/StatsNoJobWork.java |  10 +
 .../test/queries/clientpositive/orc_analyze.q   |  28 +-
 .../clientpositive/annotate_stats_part.q.out|  52 +-
 .../clientpositive/annotate_stats_table.q.out   |  20 +-
 .../results/clientpositive/limit_pushdown.q.out |  48 +-
 .../results/clientpositive/orc_analyze.q.out| 998 ---
 .../clientpositive/tez/limit_pushdown.q.out |  48 +-
 .../clientpositive/tez/orc_analyze.q.out| 998 ---
 .../clientpositive/tez/vector_char_simple.q.out |  16 +-
 .../tez/vector_left_outer_join.q.out|  16 +-
 .../tez/vector_varchar_simple.q.out |  16 +-
 .../clientpositive/tez/vectorization_0.q.out|  42 +-
 .../clientpositive/tez/vectorization_13.q.out   |  36 +-
 .../clientpositive/tez/vectorization_14.q.out   |  20 +-
 .../clientpositive/tez/vectorization_15.q.out   |  20 +-
 .../clientpositive/tez/vectorization_16.q.out   |  16 +-
 .../clientpositive/tez/vectorization_7.q.out|  20 +-
 .../clientpositive/tez/vectorization_8.q.out|  20 +-
 .../clientpositive/tez/vectorization_9.q.out|  16 +-
 .../clientpositive/tez/vectorization_div0.q.out |  28 +-
 .../tez/vectorization_limit.q.out   |  84 +-
 .../tez/vectorization_pushdown.q.out|   6 +-
 .../tez/vectorization_short_regress.q.out   | 152 +--
 .../tez/vectorized_distinct_gby.q.out   |   8 +-
 .../clientpositive/tez/vectorized_mapjoin.q.out |  14 +-
 .../tez/vectorized_nested_mapjoin.q.out |  26 +-
 .../tez/vectorized_shufflejoin.q.out|  16 +-
 .../clientpositive/vector_char_simple.q.out |  16 +-
 .../clientpositive/vector_coalesce.q.out|  40 +-
 .../clientpositive/vector_decimal_cast.q.out|  10 +-
 .../results/clientpositive/vector_elt.q.out |  12 +-
 .../results/clientpositive/vector_if_expr.q.out |  12 +-
 .../clientpositive/vector_left_outer_join.q.out |  12 +-
 .../clientpositive/vector_varchar_simple.q.out  |  16 +-
 .../clientpositive/vectorization_0.q.out|  42 +-
 .../clientpositive/vectorization_13.q.out   |  36 +-
 .../clientpositive/vectorization_14.q.out   |  20 +-
 .../clientpositive/vectorization_15.q.out   |  20 +-
 .../clientpositive/vectorization_16.q.out   |  16 +-
 .../clientpositive/vectorization_7.q.out|  20 +-
 .../clientpositive/vectorization_8.q.out|  20 +-
 .../clientpositive/vectorization_9.q.out|  16 +-
 .../clientpositive/vectorization_div0.q.out |  36 +-
 .../clientpositive/vectorization_limit.q.out|  94 +-
 .../clientpositive/vectorization_pushdown.q.out |   6 +-
 .../vectorization_short_regress.q.out   | 152 +--
 .../clientpositive/vectorized_case.q.out|   8 +-
 .../clientpositive/vectorized_casts.q.out   |   8 +-
 .../vectorized_distinct_gby.q.out   |   8 +-
 .../clientpositive/vectorized_mapjoin.q.out |  12 +-
 .../clientpositive/vectorized_math_funcs.q.out  |   8 +-
 .../vectorized_nested_mapjoin.q.out |  22 +-
 .../clientpositive/vectorized_shufflejoin.q.out |  16 +-
 .../vectorized_string_funcs.q.out   |   8 +-
 .../clientpositive/windowing_streaming.q.out|  14 +-
 58 files changed, 2499 insertions(+), 1017 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
index f089964..868cf04 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.Serializable;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 impo

  1   2   3   4   5   6   7   >