[hive] branch master updated: HIVE-21863: Improve Vectorizer type casting for WHEN expression (Vineet Garg, reviewed by Gopal V)

2019-07-16 Thread vgarg
This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ee0a10d  HIVE-21863: Improve Vectorizer type casting for WHEN 
expression (Vineet Garg, reviewed by Gopal V)
ee0a10d is described below

commit ee0a10d904bd39ca22c40cd5768bc12eef7ba71d
Author: Vineet Garg 
AuthorDate: Tue Jul 16 15:01:32 2019 -0700

HIVE-21863: Improve Vectorizer type casting for WHEN expression (Vineet 
Garg, reviewed by Gopal V)
---
 .../hive/ql/exec/vector/VectorizationContext.java  |  18 ++
 .../queries/clientpositive/vector_case_when_2.q|  12 +
 .../clientpositive/llap/vector_case_when_2.q.out   | 268 +
 .../clientpositive/vector_case_when_2.q.out| 210 
 4 files changed, 508 insertions(+)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 51d92a3..d5257c7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -618,6 +618,7 @@ import com.google.common.annotations.VisibleForTesting;
 udfsNeedingImplicitDecimalCast.add(GenericUDFOPGreaterThan.class);
 udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrGreaterThan.class);
 udfsNeedingImplicitDecimalCast.add(GenericUDFBetween.class);
+udfsNeedingImplicitDecimalCast.add(GenericUDFWhen.class);
 udfsNeedingImplicitDecimalCast.add(UDFSqrt.class);
 udfsNeedingImplicitDecimalCast.add(UDFRand.class);
 udfsNeedingImplicitDecimalCast.add(UDFLn.class);
@@ -1194,6 +1195,23 @@ import com.google.common.annotations.VisibleForTesting;
   childrenWithCasts.add(child);
 }
   }
+} else if(genericUDF instanceof GenericUDFWhen) {
+  boolean hasElseClause = children.size() % 2 == 1 ;
+  for (int i=0; i]
+  Select Operator
+expressions: q548284 (type: int), CASE WHEN ((q548284 = 
1)) THEN (0.2) WHEN ((q548284 = 2)) THEN (0.4) WHEN ((q548284 = 3)) THEN (0.6) 
WHEN ((q548284 = 4)) THEN (0.8) WHEN ((q548284 = 5)) THEN (1) ELSE (null) END 
(type: decimal(11,1))
+outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumnNums: [0, 16]
+selectExpressions: IfExprCondExprCondExpr(col 
2:boolean, col 3:decimal(11,1)col 15:decimal(11,1))(children: 
LongColEqualLongScalar(col 0:int, val 1) -> 2:boolean, 
ConstantVectorExpression(val 0.2) -> 3:decimal(11,1), 
IfExprCondExprCondExpr(col 4:boolean, col 5:decimal(11,1)col 
14:decimal(11,1))(children: LongColEqualLongScalar(col 0:int, val 2) -> 
4:boolean, ConstantVectorExpression(val 0.4) -> 5:decimal(11,1), 
IfExprCondExprCondExpr(col 6:boolean, col 7:decimal(1 [...]
+Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkObjectHashOperator
+  keyColumns: 0:int
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+  valueColumns: 16:decimal(11,1)
+  Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+  TopN Hash Memory Usage: 0.1
+  value expressions: _col1 (type: decimal(11,1))
+Execution mode: vectorized, llap
+LLAP IO: no inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+inputFormatFeatureSupport: [DECIMAL_64]
+featureSupportInUse: [DECIMAL_64]
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 1
+includeColumns: [0]
+dataColumns: q548284:int
+partitionColumnCount: 0
+scratchColumnTypeNames: [bigint, decimal(11,1), bigint, 
decimal(11,1), bigint, 

[hive] branch master updated: HIVE-21963: TransactionalValidationListener.validateTableStructure should check the partition directories in the case of partitioned tables (Jason Dere, reviewed by Vaibh

2019-07-16 Thread jdere
This is an automated email from the ASF dual-hosted git repository.

jdere pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 65cdce1  HIVE-21963: 
TransactionalValidationListener.validateTableStructure should check the 
partition directories in the case of partitioned tables (Jason Dere, reviewed 
by Vaibhav Gumashta)
65cdce1 is described below

commit 65cdce16fe40ef93ecdda763ead67ed130b3dab5
Author: Jason Dere 
AuthorDate: Tue Jul 16 09:42:59 2019 -0700

HIVE-21963: TransactionalValidationListener.validateTableStructure should 
check the partition directories in the case of partitioned tables (Jason Dere, 
reviewed by Vaibhav Gumashta)
---
 .../metastore/TransactionalValidationListener.java | 59 --
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
index afa6e4c..b1a92ef 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.metastore;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.regex.Pattern;
@@ -28,11 +29,12 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.InitializeTableWriteIdsRequest;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
@@ -455,16 +457,63 @@ public final class TransactionalValidationListener 
extends MetaStorePreEventList
*/
   private void validateTableStructure(IHMSHandler hmsHandler, Table table)
 throws MetaException {
-Path tablePath;
+Warehouse wh = hmsHandler.getWh();
+if (isPartitionedTable(table)) {
+  // Validate each partition directory
+  List partitions = getTablePartitions(hmsHandler, table);
+  for (Partition partition : partitions) {
+Path partPath = wh.getDnsPath(new 
Path(partition.getSd().getLocation()));
+validateTableStructureForPath(hmsHandler, wh, table, partPath);
+  }
+} else {
+  // Non-partitioned - only need to worry about the base table directory
+  Path tablePath = getTablePath(hmsHandler, wh, table);
+  validateTableStructureForPath(hmsHandler, wh, table, tablePath);
+}
+  }
+
+  private List getTablePartitions(IHMSHandler hmsHandler, Table 
table) throws MetaException {
+try {
+  RawStore rawStore = hmsHandler.getMS();
+  String catName = getTableCatalog(table);
+  List partitions = rawStore.getPartitions(catName, 
table.getDbName(), table.getTableName(), -1);
+  return partitions;
+} catch (Exception err) {
+  String msg = "Error getting partitions for " + 
Warehouse.getQualifiedName(table);
+  LOG.error(msg, err);
+  MetaException e1 = new MetaException(msg);
+  e1.initCause(err);
+  throw e1;
+}
+  }
+
+  private Path getTablePath(IHMSHandler hmsHandler, Warehouse wh, Table table) 
throws MetaException {
+Path tablePath = null;
 try {
-  Warehouse wh = hmsHandler.getWh();
   if (table.getSd().getLocation() == null || 
table.getSd().getLocation().isEmpty()) {
 String catName = getTableCatalog(table);
 tablePath = wh.getDefaultTablePath(hmsHandler.getMS().getDatabase(
-catName, table.getDbName()), table);
+catName, table.getDbName()), table);
   } else {
 tablePath = wh.getDnsPath(new Path(table.getSd().getLocation()));
   }
+} catch (Exception err) {
+  MetaException e1 = new MetaException("Error getting table path for " + 
Warehouse.getQualifiedName(table));
+  e1.initCause(err);
+}
+return tablePath;
+  }
+
+  private static boolean isPartitionedTable(Table tableObj) {
+List partKeys = tableObj.getPartitionKeys();
+if (partKeys != null && partKeys.size() > 0) {
+  return true;
+}
+return 

[hive] branch master updated: HIVE-21998: HIVE-21823 commit message is wrong

2019-07-16 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e494f1b  HIVE-21998: HIVE-21823 commit message is wrong
e494f1b is described below

commit e494f1bf595ec96016266bb68ce983f3de7607c8
Author: Peter Vary 
AuthorDate: Tue Jul 16 15:36:09 2019 +0200

HIVE-21998: HIVE-21823 commit message is wrong
---
 errata.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/errata.txt b/errata.txt
index 74d4c64..20df1ea 100644
--- a/errata.txt
+++ b/errata.txt
@@ -98,3 +98,4 @@ d16d4f1bcc43d6ebcab0eaf5bc635fb88b60be5f master HIVE-9423 
 https://issues.ap
 e7081035bb9768bc014f0aba11417418ececbaf0 master HIVE-17109 
https://issues.apache.org/jira/browse/HIVE-17109
 f33db1f68c68b552b9888988f818c03879749461 master HIVE-18617 
https://issues.apache.org/jira/browse/HIVE-18617
 1eea5a80ded2df33d57b2296b3bed98cb18383fd master HIVE-19157 
https://issues.apache.org/jira/browse/HIVE-19157
+4853a44b2fcfa702d23965ab0d3835b6b57954c4 master HIVE-21823 
https://issues.apache.org/jira/browse/HIVE-21823



[hive] branch master updated (5676788 -> 03b92fc)

2019-07-16 Thread kgyrtkirk
This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


from 5676788  HIVE-21988: Do not consider nodes with 0 capacity when 
calculating host affinity (Peter Vary reviewed by Oliver Draese and Adam Szita)
 new 0f39030  HIVE-21980: Parsing time can be high in case of deeply nested 
subqueries (Zoltan Haindrich reviewed by Vineet Garg)
 new 03b92fc  HIVE-21972: "show transactions" display the header twice 
(Rajkumar Singh via Gopal V)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../ql/ddl/process/ShowTransactionsOperation.java  |  6 +-
 .../apache/hadoop/hive/ql/parse/FromClauseParser.g | 16 ++--
 .../hadoop/hive/ql/parse/TestParseDriver.java  | 94 +-
 .../test/results/clientnegative/subq_insert.q.out  |  2 +-
 4 files changed, 104 insertions(+), 14 deletions(-)



[hive] 02/02: HIVE-21972: "show transactions" display the header twice (Rajkumar Singh via Gopal V)

2019-07-16 Thread kgyrtkirk
This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 03b92fc1aa38985901c8939d717aaf0cd7ee532e
Author: Rajkumar Singh 
AuthorDate: Tue Jul 16 11:14:53 2019 +0200

HIVE-21972: "show transactions" display the header twice (Rajkumar Singh 
via Gopal V)

Signed-off-by: Zoltan Haindrich 
---
 .../hadoop/hive/ql/ddl/process/ShowTransactionsOperation.java   | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/process/ShowTransactionsOperation.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/process/ShowTransactionsOperation.java
index cf3963b..805272e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/process/ShowTransactionsOperation.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/process/ShowTransactionsOperation.java
@@ -30,6 +30,7 @@ import 
org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
 import org.apache.hadoop.hive.metastore.api.TxnInfo;
 import org.apache.hadoop.hive.ql.ddl.DDLOperation;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.session.SessionState;
 
 /**
  * Operation process of showing transactions.
@@ -41,12 +42,15 @@ public class ShowTransactionsOperation extends 
DDLOperation

[hive] 01/02: HIVE-21980: Parsing time can be high in case of deeply nested subqueries (Zoltan Haindrich reviewed by Vineet Garg)

2019-07-16 Thread kgyrtkirk
This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 0f39030c3d33b11ae9c14ac81e047b44e8695371
Author: Zoltan Haindrich 
AuthorDate: Tue Jul 16 10:50:09 2019 +0200

HIVE-21980: Parsing time can be high in case of deeply nested subqueries 
(Zoltan Haindrich reviewed by Vineet Garg)

Signed-off-by: Zoltan Haindrich 
---
 .../apache/hadoop/hive/ql/parse/FromClauseParser.g | 16 ++--
 .../hadoop/hive/ql/parse/TestParseDriver.java  | 94 +-
 .../test/results/clientnegative/subq_insert.q.out  |  2 +-
 3 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
index e2309af..e6b6fd6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
@@ -96,16 +96,12 @@ fromSource
 atomjoinSource
 @init { gParent.pushMsg("joinSource", state); }
 @after { gParent.popMsg(state); }
-:
-tableSource (lateralView^)*
-|
-virtualTableSource (lateralView^)*
-|
-(subQuerySource) => subQuerySource (lateralView^)*
-|
-partitionedTableFunction (lateralView^)*
-|
-LPAREN! joinSource RPAREN!
+:  tableSource (lateralView^)*
+|  virtualTableSource (lateralView^)*
+|  (LPAREN (KW_WITH|KW_SELECT|KW_MAP|KW_REDUCE|KW_FROM)) => subQuerySource 
(lateralView^)*
+|  (LPAREN LPAREN atomSelectStatement RPAREN setOperator ) => 
subQuerySource (lateralView^)*
+|  partitionedTableFunction (lateralView^)*
+|  LPAREN! joinSource RPAREN!
 ;
 
 joinSource
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java 
b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
index c5d099d..f3372af 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
@@ -19,10 +19,14 @@ package org.apache.hadoop.hive.ql.parse;
 
 import static org.junit.Assert.assertEquals;
 
+import java.io.File;
+import java.nio.charset.Charset;
+
 import org.junit.FixMethodOrder;
 import org.junit.Test;
 import org.junit.runners.MethodSorters;
 
+import com.google.common.io.Files;
 
 @FixMethodOrder(MethodSorters.NAME_ASCENDING)
 public class TestParseDriver {
@@ -40,7 +44,7 @@ public class TestParseDriver {
 String whereStr = "field5=1 and field6 in ('a', 'b')";
 String havingStr = "sum(field7) > 11";
 ASTNode tree = parseDriver.parse(selectStr + " from table1 where " + 
whereStr
-  + " group by field1, field2 having  " + havingStr);
++ " group by field1, field2 having  " + havingStr);
 assertEquals(tree.getType(), 0);
 assertEquals(tree.getChildCount(), 2);
 ASTNode queryTree = (ASTNode) tree.getChild(0);
@@ -106,7 +110,7 @@ public class TestParseDriver {
 assertTree((ASTNode) sumNode.getChild(1), plusNode);
 
 ASTNode tree = parseDriver.parseExpression("case when field1 = 1 then 
sum(field3 + field4) when field1 != 2 then " +
-  "sum(field3-field4) else sum(field3 * field4) end");
+"sum(field3-field4) else sum(field3 * field4) end");
 assertEquals(tree.getChildCount(), 6);
 assertEquals(tree.getChild(0).getType(), HiveParser.KW_WHEN);
 assertEquals(tree.getChild(1).getType(), HiveParser.EQUAL);
@@ -214,4 +218,90 @@ public class TestParseDriver {
 "AS test_comp_exp");
   }
 
+  static class ExoticQueryBuilder {
+StringBuilder sb = new StringBuilder();
+
+public void recursiveSJS(int depth) {
+  sb.append("select ");
+  addColumns(30);
+  sb.append(" from \n");
+  tablePart(depth);
+  sb.append(" join \n");
+  tablePart(depth);
+  sb.append(" on ( ");
+  wherePart(10);
+  sb.append(" ) ");
+  sb.append(" where ");
+  wherePart(10);
+
+}
+
+private void tablePart(int depth) {
+  if (depth == 0) {
+sb.append(" baseTable ");
+  } else {
+sb.append("(");
+recursiveSJS(depth - 1);
+sb.append(") aa");
+  }
+}
+
+private void wherePart(int num) {
+  for (int i = 0; i < num - 1; i++) {
+sb.append("x = ");
+sb.append(i);
+sb.append(" or ");
+  }
+  sb.append("x = -1");
+
+}
+
+private void addColumns(int num) {
+  for (int i = 0; i < num - 1; i++) {
+sb.append("c");
+sb.append(i);
+sb.append(" + 2*sqrt(11)+");
+sb.append(i);
+sb.append(",");
+  }
+  sb.append("cE");
+}
+
+public String getQuery() {
+  return sb.toString();
+}
+  }
+
+  @Test(timeout = 1)
+  public void testExoticSJSSubQuery() throws Exception {
+ExoticQueryBuilder eqb = new ExoticQueryBuilder();
+eqb.recursiveSJS(10);
+String q = eqb.getQuery();
+

[hive] branch master updated: HIVE-21988: Do not consider nodes with 0 capacity when calculating host affinity (Peter Vary reviewed by Oliver Draese and Adam Szita)

2019-07-16 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 5676788  HIVE-21988: Do not consider nodes with 0 capacity when 
calculating host affinity (Peter Vary reviewed by Oliver Draese and Adam Szita)
5676788 is described below

commit 5676788893f264e6f42435100f6f25ba2b6d28b7
Author: Peter Vary 
AuthorDate: Tue Jul 16 10:51:28 2019 +0200

HIVE-21988: Do not consider nodes with 0 capacity when calculating host 
affinity (Peter Vary reviewed by Oliver Draese and Adam Szita)
---
 .../registry/impl/InactiveServiceInstance.java |   3 +-
 .../llap/registry/impl/LlapFixedRegistryImpl.java  |   8 +-
 .../registry/impl/LlapZookeeperRegistryImpl.java   |   8 +-
 .../tez/HostAffinitySplitLocationProvider.java |   4 +-
 .../org/apache/hadoop/hive/ql/exec/tez/Utils.java  |  49 ---
 .../tez/TestHostAffinitySplitLocationProvider.java |   4 +-
 .../apache/hadoop/hive/ql/exec/tez/TestUtils.java  | 157 +
 7 files changed, 210 insertions(+), 23 deletions(-)

diff --git 
a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/InactiveServiceInstance.java
 
b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/InactiveServiceInstance.java
index 1d6b716..d9c2364 100644
--- 
a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/InactiveServiceInstance.java
+++ 
b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/InactiveServiceInstance.java
@@ -14,6 +14,7 @@
 
 package org.apache.hadoop.hive.llap.registry.impl;
 
+import java.util.Collections;
 import java.util.Map;
 
 import org.apache.hadoop.hive.llap.registry.LlapServiceInstance;
@@ -62,7 +63,7 @@ public class InactiveServiceInstance implements 
LlapServiceInstance {
 
   @Override
   public Map getProperties() {
-throw new UnsupportedOperationException();
+return Collections.emptyMap();
   }
 
   @Override
diff --git 
a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapFixedRegistryImpl.java
 
b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapFixedRegistryImpl.java
index 344eba7..2bedb32 100644
--- 
a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapFixedRegistryImpl.java
+++ 
b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapFixedRegistryImpl.java
@@ -29,6 +29,8 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -122,7 +124,11 @@ public class LlapFixedRegistryImpl implements 
ServiceRegistry locations;
+  @VisibleForTesting
+  final List locations;
 
   public HostAffinitySplitLocationProvider(List knownLocations) {
 Preconditions.checkState(knownLocations != null && 
!knownLocations.isEmpty(),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java
index 1b7321b..db1a0e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.llap.registry.LlapServiceInstance;
-import org.apache.hadoop.hive.llap.registry.LlapServiceInstanceSet;
 import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.split.SplitLocationProvider;
@@ -50,21 +50,7 @@ public class Utils {
 LOG.info("SplitGenerator using llap affinitized locations: " + 
useCustomLocations);
 if (useCustomLocations) {
   LlapRegistryService serviceRegistry = 
LlapRegistryService.getClient(conf);
-  LOG.info("Using LLAP instance " + serviceRegistry.getApplicationId());
-
-  Collection serviceInstances =
-serviceRegistry.getInstances().getAllInstancesOrdered(true);
-  Preconditions.checkArgument(!serviceInstances.isEmpty(),
-  "No running LLAP daemons! Please check LLAP service status and 
zookeeper configuration");
-  ArrayList locations = new ArrayList<>(serviceInstances.size());
-  for (LlapServiceInstance serviceInstance : serviceInstances) {
-if (LOG.isDebugEnabled()) {
-  LOG.debug("Adding " + serviceInstance.getWorkerIdentity() + " with 
hostname=" +
-  serviceInstance.getHost() + " to list for split locations");
-}

[hive] branch master updated: HIVE-21956 : Add the list of table selected by dump in the dump folder. (Mahesh Kumar Behera reviewed by Sankar Hariappan)

2019-07-16 Thread mahesh
This is an automated email from the ASF dual-hosted git repository.

mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new c7340c6  HIVE-21956 : Add the list of table selected by dump in the 
dump folder.  (Mahesh Kumar Behera reviewed by  Sankar Hariappan)
c7340c6 is described below

commit c7340c6f6e765ef6e499f7a3c399beab843cb6b0
Author: Mahesh Kumar Behera 
AuthorDate: Tue Jul 16 12:04:08 2019 +0530

HIVE-21956 : Add the list of table selected by dump in the dump folder.  
(Mahesh Kumar Behera reviewed by  Sankar Hariappan)
---
 .../parse/TestTableLevelReplicationScenarios.java  | 107 -
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 102 +---
 .../events/filesystem/BootstrapEventsIterator.java |   2 +-
 .../events/filesystem/DatabaseEventsIterator.java  |   6 +-
 .../hadoop/hive/ql/exec/repl/util/ReplUtils.java   |  18 +++-
 5 files changed, 218 insertions(+), 17 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java
index 09db38d..270e61a 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.ql.parse;
 
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
@@ -36,6 +37,11 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.HashSet;
+import java.util.Set;
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 
 import static org.apache.hadoop.hive.ql.exec.repl.ReplExternalTables.FILE_NAME;
 import static 
org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.INC_BOOTSTRAP_ROOT_DIR_NAME;
@@ -153,6 +159,9 @@ public class TestTableLevelReplicationScenarios extends 
BaseReplicationScenarios
   verifyBootstrapDirInIncrementalDump(tuple.dumpLocation, 
bootstrappedTables);
 }
 
+// If the policy contains '.'' means its table level replication.
+verifyTableListForPolicy(tuple.dumpLocation, replPolicy.contains(".'") ? 
expectedTables : null);
+
 replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause)
 .run("use " + replicatedDbName)
 .run("show tables")
@@ -194,6 +203,36 @@ public class TestTableLevelReplicationScenarios extends 
BaseReplicationScenarios
 }
   }
 
+  private void verifyTableListForPolicy(String dumpLocation, String[] 
tableList) throws Throwable {
+FileSystem fileSystem = primary.miniDFSCluster.getFileSystem();
+Path tableListFile = new Path(dumpLocation, 
ReplUtils.REPL_TABLE_LIST_DIR_NAME);
+tableListFile = new Path(tableListFile, primaryDbName.toLowerCase());
+
+if (tableList == null) {
+  Assert.assertFalse(fileSystem.exists(tableListFile));
+  return;
+} else {
+  Assert.assertTrue(fileSystem.exists(tableListFile));
+}
+
+BufferedReader reader = null;
+try {
+  InputStream inputStream = fileSystem.open(tableListFile);
+  reader = new BufferedReader(new InputStreamReader(inputStream));
+  Set tableNames = new HashSet<>(Arrays.asList(tableList));
+  int numTable = 0;
+  for (String line = reader.readLine(); line != null; line = 
reader.readLine()) {
+numTable++;
+Assert.assertTrue(tableNames.contains(line));
+  }
+  Assert.assertEquals(numTable, tableList.length);
+} finally {
+  if (reader != null) {
+reader.close();
+  }
+}
+  }
+
   @Test
   public void testBasicBootstrapWithIncludeList() throws Throwable {
 String[] originalNonAcidTables = new String[] {"t1", "t2"};
@@ -660,7 +699,7 @@ public class TestTableLevelReplicationScenarios extends 
BaseReplicationScenarios
 .run("alter table out100 rename to in100") // this will add the 
bootstrap
 .run("drop table in100");  // table in100 is dropped, so no 
bootstrap should happen.
 
-replicatedTables = new String[] {"in200", "in12", "in12", "in14"};
+replicatedTables = new String[] {"in200", "in12", "in11", "in14"};
 bootstrapTables = new String[] {"in14", "in200"};
 replicateAndVerify(replPolicy, null, lastReplId, null,
 null, bootstrapTables, replicatedTables);
@@ -907,4 +946,70 @@ public class TestTableLevelReplicationScenarios extends 
BaseReplicationScenarios
 replicateAndVerify(newPolicy, replPolicy, lastReplId, null,
 null, bootstrapTables,