from:"tchoi"

(hive) branch master updated: HIVE-27833: Hive Acid Replication Support for Dell Powerscale (#4841) (Harshal Patel, reviewed by Teddy Choi)

2023-11-21 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ae64e6de113 HIVE-27833: Hive Acid Replication Support for Dell 
Powerscale (#4841) (Harshal Patel, reviewed by Teddy Choi)
ae64e6de113 is described below

commit ae64e6de113d8eb7b7a350d1b4430f16c824244d
Author: harshal-16 <109334642+harshal...@users.noreply.github.com>
AuthorDate: Wed Nov 22 06:44:27 2023 +0530

HIVE-27833: Hive Acid Replication Support for Dell Powerscale (#4841) 
(Harshal Patel, reviewed by Teddy Choi)
---
 .../org/apache/hadoop/hive/common/FileUtils.java   | 24 +-
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  2 ++
 .../apache/hadoop/hive/common/TestFileUtils.java   | 15 ++
 .../hadoop/hive/ql/parse/repl/CopyUtils.java   |  2 +-
 .../apache/hadoop/hive/shims/Hadoop23Shims.java| 18 +++-
 .../java/org/apache/hadoop/hive/shims/Utils.java   |  8 ++--
 6 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 18efe167a63..be994461f31 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.common;
 
+import static org.apache.hadoop.hive.shims.Utils.RAW_RESERVED_VIRTUAL_PATH;
+
 import java.io.EOFException;
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -61,11 +63,13 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.PathExistsException;
 import org.apache.hadoop.fs.PathIsDirectoryException;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.shims.Utils;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.functional.RemoteIterators;
 import org.apache.hive.common.util.ShutdownHookManager;
@@ -767,7 +771,7 @@ public final class FileUtils {
   // is tried and it fails. We depend upon that behaviour in cases like 
replication,
   // wherein if distcp fails, there is good reason to not plod along with 
a trivial
   // implementation, and fail instead.
-  copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst, 
deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS), conf, 
copyStatistics);
+  copied = doIOUtilsCopyBytes(srcFS, srcFS.getFileStatus(src), dstFS, dst, 
deleteSource, overwrite, shouldPreserveXAttrs(conf, srcFS, dstFS, src), conf, 
copyStatistics);
 }
 return copied;
   }
@@ -895,11 +899,21 @@ public final class FileUtils {
 }
   }
 
-  public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS, 
FileSystem dstFS) throws IOException {
-if (!Utils.checkFileSystemXAttrSupport(srcFS) || 
!Utils.checkFileSystemXAttrSupport(dstFS)){
-  return false;
+  public static boolean shouldPreserveXAttrs(HiveConf conf, FileSystem srcFS, 
FileSystem dstFS, Path path) throws IOException {
+Preconditions.checkNotNull(path);
+if 
(conf.getBoolVar(ConfVars.DFS_XATTR_ONLY_SUPPORTED_ON_RESERVED_NAMESPACE)) {
+
+  if (!(path.toUri().getPath().startsWith(RAW_RESERVED_VIRTUAL_PATH)
+&& Utils.checkFileSystemXAttrSupport(srcFS, new 
Path(RAW_RESERVED_VIRTUAL_PATH))
+&& Utils.checkFileSystemXAttrSupport(dstFS, new 
Path(RAW_RESERVED_VIRTUAL_PATH {
+return false;
+  }
+} else {
+  if (!Utils.checkFileSystemXAttrSupport(srcFS) || 
!Utils.checkFileSystemXAttrSupport(dstFS)) {
+return false;
+  }
 }
-for (Map.Entry entry : 
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
+for (Map.Entry entry : 
conf.getPropsWithPrefix(Utils.DISTCP_OPTIONS_PREFIX).entrySet()) {
   String distCpOption = entry.getKey();
   if (distCpOption.startsWith("p")) {
 return distCpOption.contains("x");
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index ec463178912..027bab6eb53 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -487,6 +487,8 @@ public class HiveConf extends Configuration {
 MSC_CACHE_RECORD_STATS("hive.metastore.client.cache.v2.recordStats", false,
 "This property enables recording metastore client

[hive] branch master updated: HIVE-27669: Hive Acid CTAS fails incremental if no of rows inserted is > INT_MAX (#4665) (Harshal Patel, reviewed by Teddy Choi)

2023-09-11 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e49fe08b157 HIVE-27669: Hive Acid CTAS fails incremental if no of rows 
inserted is > INT_MAX (#4665) (Harshal Patel, reviewed by Teddy Choi)
e49fe08b157 is described below

commit e49fe08b1579cf03d4bb69455535d688fa5bd667
Author: harshal-16 <109334642+harshal...@users.noreply.github.com>
AuthorDate: Mon Sep 11 12:27:34 2023 +0530

HIVE-27669: Hive Acid CTAS fails incremental if no of rows inserted is > 
INT_MAX (#4665) (Harshal Patel, reviewed by Teddy Choi)
---
 ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLTask.java |  4 
 .../org/apache/hadoop/hive/ql/exec/repl/util/ReplUtils.java| 10 ++
 2 files changed, 14 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLTask.java
index 11b7af6e1de..73966a28f5c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLTask.java
@@ -86,6 +86,10 @@ public final class DDLTask extends Task implements 
Serializable {
 throw new IllegalArgumentException("Unknown DDL request: " + 
ddlDesc.getClass());
   }
 } catch (Throwable e) {
+  if(work.isReplication() && ReplUtils.shouldIgnoreOnError(ddlOperation, 
e)) {
+LOG.warn("Error while table creation: ", e);
+return 0;
+  }
   failed(e);
   if(ddlOperation != null) {
 LOG.error("DDLTask failed, DDL Operation: " + 
ddlOperation.getClass().toString(), e);
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/ReplUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/ReplUtils.java
index abc3e6627c9..61516a8604d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/ReplUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/ReplUtils.java
@@ -45,6 +45,8 @@ import 
org.apache.hadoop.hive.metastore.messaging.MessageFactory;
 import org.apache.hadoop.hive.metastore.utils.StringUtils;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.ddl.DDLWork;
+import org.apache.hadoop.hive.ql.ddl.DDLOperation;
+import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableOperation;
 import 
org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableSetPropertiesDesc;
 import org.apache.hadoop.hive.ql.ddl.table.partition.PartitionUtils;
 import org.apache.hadoop.hive.ql.exec.Task;
@@ -351,6 +353,14 @@ public class ReplUtils {
 return errorCode;
   }
 
+  public static boolean shouldIgnoreOnError(DDLOperation ddlOperation, 
Throwable e) {
+return ReplUtils.isCreateOperation(ddlOperation) && 
e.getMessage().contains("java.lang.NumberFormatException");
+  }
+
+  public static boolean isCreateOperation(DDLOperation ddlOperation) {
+return ddlOperation instanceof CreateTableOperation;
+  }
+
   private static String getMetricStageName(String stageName, 
ReplicationMetricCollector metricCollector) {
 if( stageName == "REPL_DUMP" || stageName == "REPL_LOAD" || stageName == 
"ATLAS_DUMP" || stageName == "ATLAS_LOAD"
 || stageName == "RANGER_DUMP" || stageName == "RANGER_LOAD" || 
stageName == "RANGER_DENY"){

[hive] branch master updated: HIVE-21213: Update integration test to mark compactions as complete instead of aborted. (#4671)

2023-09-07 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1fb439f8be6 HIVE-21213: Update integration test to mark compactions as 
complete instead of aborted. (#4671)
1fb439f8be6 is described below

commit 1fb439f8be6d0b248835e4e137e8e9adcc0f4e9b
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Fri Sep 8 07:29:04 2023 +0530

HIVE-21213: Update integration test to mark compactions as complete instead 
of aborted. (#4671)
---
 .../parse/TestReplicationScenariosAcidTables.java  | 29 ++
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index d63d8d07649..93fd5f0cbc9 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -29,16 +29,18 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.ReplChangeManager;
 import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.metastore.api.AbortCompactionRequest;
+import org.apache.hadoop.hive.metastore.api.CompactionInfoStruct;
 import org.apache.hadoop.hive.metastore.api.CompactionRequest;
 import org.apache.hadoop.hive.metastore.api.CompactionType;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest;
+import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import 
org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder;
+import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore;
@@ -4037,7 +4039,7 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
   throws Throwable {
 HiveConf hiveConf = new HiveConf(primary.getConf());
 TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
-abortPreviousCompactions(txnHandler, hiveConf);
+markPreviousCompactionsAsComplete(txnHandler, hiveConf);
 CompactionRequest rqst = new CompactionRequest(dbName, tblName, 
compactionType);
 rqst.setPartitionname(partName);
 txnHandler.compact(rqst);
@@ -4046,19 +4048,24 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 runCleaner(hiveConf);
   }
 
-  private void abortPreviousCompactions(TxnStore txnHandler, HiveConf conf) 
throws Throwable {
+  private void markPreviousCompactionsAsComplete(TxnStore txnHandler, HiveConf 
conf) throws Throwable {
 Connection conn = TestTxnDbUtil.getConnection(conf);
 Statement stmt = conn.createStatement();
-ResultSet rs = stmt.executeQuery("select * from COMPACTION_QUEUE");
-List compactionsToAbort = new ArrayList<>();
+ResultSet rs = stmt.executeQuery("select CQ_ID from COMPACTION_QUEUE");
+List openCompactionIds = new ArrayList<>();
 while (rs.next()) {
-  compactionsToAbort.add(rs.getLong("CQ_ID"));
-}
-if (!compactionsToAbort.isEmpty()) {
-  AbortCompactionRequest rqst = new AbortCompactionRequest();
-  rqst.setCompactionIds(compactionsToAbort);
-  txnHandler.abortCompactions(rqst);
+  openCompactionIds.add(rs.getLong(1));
 }
+openCompactionIds.forEach(id->{
+  CompactionInfoStruct compactionInfoStruct = new CompactionInfoStruct();
+  compactionInfoStruct.setId(id);
+  CompactionInfo compactionInfo = 
CompactionInfo.compactionStructToInfo(compactionInfoStruct);
+  try {
+txnHandler.markCompacted(compactionInfo);
+  } catch (MetaException e) {
+throw new RuntimeException(e);
+  }
+});
   }
 
   private FileStatus[] getDirsInTableLoc(WarehouseInstance wh, String db, 
String table) throws Throwable {

[hive] branch master updated: HIVE-21213: Acid table bootstrap replication needs to handle directory created by compaction with txn id (#4631) (Rakshith C, reviewed by Teddy Choi)

2023-09-01 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e84f792f730 HIVE-21213: Acid table bootstrap replication needs to 
handle directory created by compaction with txn id (#4631) (Rakshith C, 
reviewed by Teddy Choi)
e84f792f730 is described below

commit e84f792f730df7df941fca98fdf5164940d45953
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Sat Sep 2 00:23:10 2023 +0530

HIVE-21213: Acid table bootstrap replication needs to handle directory 
created by compaction with txn id (#4631) (Rakshith C, reviewed by Teddy Choi)
---
 .../parse/TestReplicationScenariosAcidTables.java  | 189 +
 .../hadoop/hive/ql/parse/repl/CopyUtils.java   |  11 ++
 2 files changed, 200 insertions(+)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index d0bee699a19..d63d8d07649 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -28,10 +28,15 @@ import org.apache.hadoop.hive.common.repl.ReplConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.ReplChangeManager;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.AbortCompactionRequest;
+import org.apache.hadoop.hive.metastore.api.CompactionRequest;
+import org.apache.hadoop.hive.metastore.api.CompactionType;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest;
 import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import 
org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
@@ -40,6 +45,7 @@ import 
org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore;
 import 
org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments;
 import 
org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil;
 import org.apache.hadoop.hive.ql.DriverFactory;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.IDriver;
@@ -72,6 +78,9 @@ import org.junit.Test;
 import org.junit.BeforeClass;
 
 import javax.annotation.Nullable;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.Statement;
 import java.util.concurrent.TimeUnit;
 import java.io.File;
 import java.io.IOException;
@@ -94,6 +103,8 @@ import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DATABASE_
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_TABLE_PROPERTY;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.SOURCE_OF_REPLICATION;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD;
+import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runCleaner;
+import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runWorker;
 import static org.apache.hadoop.hive.ql.exec.repl.ReplAck.DUMP_ACKNOWLEDGEMENT;
 import static org.apache.hadoop.hive.ql.exec.repl.ReplAck.LOAD_ACKNOWLEDGEMENT;
 
@@ -4021,4 +4032,182 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 TimeUnit.MINUTES);
 isMetricsEnabledForTests(false);
   }
+
+  private void runCompaction(String dbName, String tblName, String partName, 
CompactionType compactionType)
+  throws Throwable {
+HiveConf hiveConf = new HiveConf(primary.getConf());
+TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
+abortPreviousCompactions(txnHandler, hiveConf);
+CompactionRequest rqst = new CompactionRequest(dbName, tblName, 
compactionType);
+rqst.setPartitionname(partName);
+txnHandler.compact(rqst);
+hiveConf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, false);
+runWorker(hiveConf);
+runCleaner(hiveConf);
+  }
+
+  private void abortPreviousCompactions(TxnStore txnHandler, HiveConf conf) 
throws Throwable {
+Connection conn = TestTxnDbUtil.getConnection(conf);
+Statement stmt = conn.createStatement();
+ResultSet rs = stmt.executeQuery("select * from COMPACTION_QUEUE");
+List compactionsToAbort

[hive] branch master updated (1c5d9b90d96 -> 7d69a8ce8ce)

2023-03-15 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from 1c5d9b90d96 HIVE-27101: Support incremental materialized view rebuild 
when Iceberg source tables have insert operation only. (Krisztian Kasa, 
reviewed by Denys Kuzmenko, Aman Sinha)
 add 7d69a8ce8ce HIVE-27130: Add metrics to report the size of data 
replicated/copied to target (#4105) (Amit Saonerkar, reviewed by Teddy Choi)

No new revisions were added by this update.

Summary of changes:
 ...erruptCallback.java => DataCopyStatistics.java} |  16 ++-
 .../org/apache/hadoop/hive/common/FileUtils.java   |  27 +++--
 .../apache/hadoop/hive/common/TestFileUtils.java   |   3 +-
 .../apache/hadoop/hive/common/TestFileUtils.java   |  23 ++--
 .../parse/TestReplicationScenariosAcidTables.java  | 119 -
 ...licationScenariosIncrementalLoadAcidTables.java |   7 +-
 .../hadoop/hive/ql/parse/WarehouseInstance.java|   4 +-
 .../org/apache/hadoop/hive/ql/exec/CopyTask.java   |  20 +++-
 .../apache/hadoop/hive/ql/exec/ReplCopyTask.java   |  22 +++-
 .../hadoop/hive/ql/exec/repl/DirCopyTask.java  |  27 -
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   |   7 +-
 .../hadoop/hive/ql/parse/repl/CopyUtils.java   |  66 
 .../repl/metric/ReplicationMetricCollector.java|  10 ++
 .../hive/ql/parse/repl/metric/event/Metadata.java  |  10 ++
 .../hadoop/hive/ql/parse/repl/TestCopyUtils.java   |   2 +-
 .../llap/replication_metrics_ingest.q.out  |   4 +-
 16 files changed, 307 insertions(+), 60 deletions(-)
 copy common/src/java/org/apache/hadoop/hive/common/{HiveInterruptCallback.java 
=> DataCopyStatistics.java} (78%)

[hive] branch master updated (2b9d290c04c -> dfb1dd9edba)

2023-03-08 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from 2b9d290c04c HIVE-27107 : Fix improper metrics count for RESUME/RESET 
workflow (#4084) (Shreenidhi Saigaonkar, reviewed by Teddy Choi)
 add dfb1dd9edba HIVE-27123 : In PRE_OPTIMIZED_BOOTSTRAP operation, Load 
side stores dump_execution_id as -1 (#4100) (Harshal Patel, reviewed by Teddy 
Choi)

No new revisions were added by this update.

Summary of changes:
 .../apache/hadoop/hive/ql/exec/repl/OptimisedBootstrapUtils.java| 6 --
 ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadWork.java   | 6 +++---
 .../java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadWorkMBean.java | 3 ++-
 .../apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java| 3 ++-
 .../org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java | 4 ++--
 5 files changed, 13 insertions(+), 9 deletions(-)

[hive] branch master updated (abbabdd86b3 -> 2b9d290c04c)

2023-03-08 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from abbabdd86b3 HIVE-26735: Ability to sort the data during rebalancing 
compaction (Laszlo Vegh, reviewed by Krisztian Kasa, Denys Kuzmenko, Sourabh 
Badhya)
 add 2b9d290c04c HIVE-27107 : Fix improper metrics count for RESUME/RESET 
workflow (#4084) (Shreenidhi Saigaonkar, reviewed by Teddy Choi)

No new revisions were added by this update.

Summary of changes:
 .../parse/TestReplicationScenariosAcidTables.java   | 21 +
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java  |  9 -
 2 files changed, 29 insertions(+), 1 deletion(-)

[hive] branch master updated: HIVE-26962 : Expose resume/reset ready state through replication metrics when first cycle of resume/reset completes (#4016) (Shreenidhi Saigaonkar, reviewed by Teddy Choi

2023-02-23 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1d0fcea3adb HIVE-26962 : Expose resume/reset ready state through 
replication metrics when first cycle of resume/reset completes (#4016) 
(Shreenidhi Saigaonkar, reviewed by Teddy Choi)
1d0fcea3adb is described below

commit 1d0fcea3adbec9ea9ee3f999b080a6f98ed89264
Author: shreenidhiSaigaonkar 
<98075562+shreenidhisaigaon...@users.noreply.github.com>
AuthorDate: Fri Feb 24 06:16:42 2023 +0530

HIVE-26962 : Expose resume/reset ready state through replication metrics 
when first cycle of resume/reset completes (#4016) (Shreenidhi Saigaonkar, 
reviewed by Teddy Choi)
---
 .../parse/TestReplicationScenariosAcidTables.java  | 88 +-
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 12 ++-
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  6 +-
 .../hive/ql/parse/repl/metric/event/Status.java|  1 +
 4 files changed, 104 insertions(+), 3 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index 0dfb07f2282..5c69443e73f 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -59,6 +59,7 @@ import 
org.apache.hadoop.hive.ql.parse.repl.metric.MetricCollector;
 import org.apache.hadoop.hive.ql.parse.repl.metric.event.Metric;
 import org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric;
 import org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.Status;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.shims.Utils;
@@ -71,7 +72,7 @@ import org.junit.Test;
 import org.junit.BeforeClass;
 
 import javax.annotation.Nullable;
-
+import java.util.concurrent.TimeUnit;
 import java.io.File;
 import java.io.IOException;
 import java.io.BufferedReader;
@@ -103,6 +104,8 @@ import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.fail;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 /**
  * TestReplicationScenariosAcidTables - test replication for ACID tables.
  */
@@ -3797,4 +3800,87 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 // ensure event count is captured appropriately in EventsDumpMetadata.
 assertEquals(eventsCountInAckFile, eventCountFromStagingDir);
   }
+
+  @Test
+  public void testResumeWorkFlow() throws Throwable {
+isMetricsEnabledForTests(true);
+
+MetricCollector.getInstance().getMetrics().clear();
+
+// Do bootstrap
+primary.run("use " + primaryDbName)
+  .run("create table tb1(id int)")
+  .run("insert into tb1 values(10)")
+  .dump(primaryDbName);
+replica.load(replicatedDbName, primaryDbName);
+
+// incremental
+primary.run("use " + primaryDbName)
+  .run("insert into tb1 values(20)")
+  .dump(primaryDbName);
+replica.load(replicatedDbName, primaryDbName);
+
+// suppose this is the point of failover
+List failoverConfigs = Arrays.asList("'" + 
HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
+primary.dump(primaryDbName, failoverConfigs);
+replica.load(replicatedDbName, primaryDbName, failoverConfigs);
+
+// let's modify replica/target after failover
+replica.run("use " + replicatedDbName)
+  .run("insert into tb1 values(30),(40)")
+  .run("create table tb2(id int)")
+  .run("insert into tb2 values(10),(20)");
+
+// orchestrator will do the swapping and setting correct db params
+Map dbParams = 
replica.getDatabase(replicatedDbName).getParameters();
+String lastId = dbParams.get("repl.last.id");
+String targetLastId = dbParams.get("repl.target.last.id");
+
+primary.run("alter database " + primaryDbName
+  + " set dbproperties('repl.resume.started'='true', 'repl.source.for'='', 
'repl.target" +
+  ".for'='true', 'repl.last.id'='" + targetLastId + "' 
,'repl.target.last.id'='" + lastId +
+  "')");
+
+replica.run("alter database " + replicatedDbName
+  + " set dbproperties('repl.target.for'='', 
'repl.source.for'='p1','repl.resume" +
+  "

[hive] branch master updated: HIVE-26961: Fix improper replication metric count when open transactions are filtered. (#4041) (Rakshith Chandraiah, reviewed by Teddy Choi)

2023-02-08 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 11f7ebbcad5 HIVE-26961: Fix improper replication metric count when 
open transactions are filtered. (#4041) (Rakshith Chandraiah, reviewed by Teddy 
Choi)
11f7ebbcad5 is described below

commit 11f7ebbcad590fe569ce8f8588f667a6274d657b
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Wed Feb 8 13:36:42 2023 +0530

HIVE-26961: Fix improper replication metric count when open transactions 
are filtered. (#4041) (Rakshith Chandraiah, reviewed by Teddy Choi)
---
 .../parse/TestReplicationScenariosAcidTables.java  | 61 ++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |  6 ++-
 .../ql/parse/repl/dump/events/EventHandler.java|  5 ++
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index fda11c127e4..0dfb07f2282 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -55,6 +55,10 @@ import 
org.apache.hadoop.hive.ql.parse.repl.load.EventDumpDirComparator;
 import org.apache.hadoop.hive.ql.parse.repl.load.FailoverMetaData;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.parse.repl.metric.MetricCollector;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.Metric;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.shims.Utils;
@@ -92,6 +96,7 @@ import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUN
 import static org.apache.hadoop.hive.ql.exec.repl.ReplAck.DUMP_ACKNOWLEDGEMENT;
 import static org.apache.hadoop.hive.ql.exec.repl.ReplAck.LOAD_ACKNOWLEDGEMENT;
 
+import static 
org.apache.hadoop.hive.ql.parse.repl.metric.ReplicationMetricCollector.isMetricsEnabledForTests;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -3736,4 +3741,60 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 
 ReplDumpWork.testDeletePreviousDumpMetaPath(false);
   }
+  @Test
+  public void testEventsDumpedCountWithFilteringOfOpenTransactions() throws 
Throwable {
+final int REPL_MAX_LOAD_TASKS = 5;
+List incrementalBatchConfigs = Arrays.asList(
+String.format("'%s'='%s'", 
HiveConf.ConfVars.REPL_BATCH_INCREMENTAL_EVENTS, "true"),
+String.format("'%s'='%d'", 
HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS, REPL_MAX_LOAD_TASKS),
+String.format("'%s'='%s'", 
HiveConf.ConfVars.REPL_FILTER_TRANSACTIONS, "true")
+);
+
+WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName)
+.run("create table t1 (id int)")
+.run("insert into table t1 values (1)")
+.dump(primaryDbName, incrementalBatchConfigs);
+
+FileSystem fs = new Path(bootstrapDump.dumpLocation).getFileSystem(conf);
+
+replica.load(replicatedDbName, primaryDbName, incrementalBatchConfigs)
+.run("use " + replicatedDbName)
+.run("select * from t1")
+.verifyResults(new String[]{"1"});
+
+isMetricsEnabledForTests(true);
+MetricCollector collector = MetricCollector.getInstance();
+//incremental run
+WarehouseInstance.Tuple incrementalDump = primary.run("use " + 
primaryDbName)
+.run("insert into t1 values(2)")
+.run("insert into t1 values(3)")
+.run("select * from t1")  // will open a read only transaction 
which should be filtered.
+.run("insert into t1 values(4)")
+.run("insert into t1 values(5)")
+.dump(primaryDbName, incrementalBatchConfigs);
+
+ReplicationMetric metric = collector.getMetrics().getLast();
+Stage stage = metric.getProgress().getStageByName("REPL_DUMP");
+Metric eventMetric = 
stage.getMetricByName(ReplUtils.MetricName.EVENTS.name());
+long eventCountFromMetrics = eventMetric.getTotalCount();
+
+Path dumpPath = new Pa

[hive] branch master updated: HIVE-26920: Add new view 'replication_failover_failback_metrics' in sys db to capture failover and failback metrics (#4023) (Amit Saonerkar, reviewed by Teddy Choi)

2023-02-08 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 0c059c6e9bb HIVE-26920: Add new view 
'replication_failover_failback_metrics' in sys db to capture failover and 
failback metrics  (#4023) (Amit Saonerkar, reviewed by Teddy Choi)
0c059c6e9bb is described below

commit 0c059c6e9bba35f353cd3212c5c01e86eba51125
Author: atsaonerk <106254618+atsaon...@users.noreply.github.com>
AuthorDate: Wed Feb 8 13:32:16 2023 +0530

HIVE-26920: Add new view 'replication_failover_failback_metrics' in sys db 
to capture failover and failback metrics  (#4023) (Amit Saonerkar, reviewed by 
Teddy Choi)
---
 .../parse/TestReplicationOptimisedBootstrap.java   |  94 +++---
 .../parse/TestReplicationScenariosAcidTables.java  |  88 -
 .../parse/TestScheduledReplicationScenarios.java   |  16 ++--
 .../upgrade/hive/hive-schema-4.0.0.hive.sql|  17 
 ...upgrade-4.0.0-alpha-1-to-4.0.0-alpha-2.hive.sql |  16 
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 105 ++---
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  55 +--
 .../hive/ql/parse/ReplicationSemanticAnalyzer.java |   2 +-
 .../hadoop/hive/ql/stats/StatsUpdaterThread.java   |   2 +-
 .../results/clientpositive/llap/resourceplan.q.out |   4 +
 .../llap/strict_managed_tables_sysdb.q.out |   6 ++
 .../test/results/clientpositive/llap/sysdb.q.out   |  11 ++-
 .../apache/hadoop/hive/common/repl/ReplConst.java  |  10 ++
 .../hive/metastore/utils/MetaStoreUtils.java   |   6 +-
 .../hive/metastore/PartitionManagementTask.java|   2 +-
 15 files changed, 316 insertions(+), 118 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 1068fe4ecba..91f8f76e1ff 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.parse;
 
+import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -67,6 +68,11 @@ import static 
org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_RESET;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DB_PROPERTY;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_FAILOVER_ENDPOINT;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_METRICS_FAILBACK_COUNT;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_METRICS_FAILOVER_COUNT;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_METRICS_LAST_FAILBACK_ENDTIME;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_METRICS_LAST_FAILBACK_STARTTIME;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_METRICS_LAST_FAILOVER_TYPE;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.TARGET_OF_REPLICATION;
 import static 
org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION;
 import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.EVENT_ACK_FILE;
@@ -616,6 +622,10 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 
 assertTrue(tablesBootstrapped.containsAll(Arrays.asList("t1", "t2", 
"t3")));
 
+// Get source or replica database properties and verify replication 
metrics properties
+Map sourceParams = 
replica.getDatabase(replicatedDbName).getParameters();
+verifyReplicationMetricsStatistics(sourceParams, 1, 1, 
ReplConst.FailoverType.UNPLANNED.toString());
+
 // Do a reverse load, this should do a bootstrap load for the tables in 
table_diff and incremental for the rest.
 primary.load(primaryDbName, replicatedDbName, withClause);
 
@@ -635,10 +645,9 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 .run("select place from t5 where country = 'china'")
 .verifyResults(new String[] { "beejing" });
 
-// Check for correct db Properties set.
-
+// Get target or primary database properties and verify replication 
metrics properties
 Map targetParams = 
primary.getDatabase(primaryDbName).getParameters();
-Map sourceParams = 
replica.getDatabase(replicatedDbName).getParameters();
+verifyReplicationMetricsStatistics(targetParams, 1, 1, 
ReplConst.FailoverType.UNPLANNED.toString());
 
 // Check th

[hive] branch master updated: HIVE-26937: Batching incremental events to avoid O.O.M during repl load (#4019) (Rakshith Chandraiah, reviewed by Teddy Choi)

2023-02-06 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new dda3e7190dd HIVE-26937: Batching incremental events to avoid O.O.M 
during repl load (#4019) (Rakshith Chandraiah, reviewed by Teddy Choi)
dda3e7190dd is described below

commit dda3e7190dddfec676394d4af12afa330e00b4cf
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Tue Feb 7 10:51:25 2023 +0530

HIVE-26937: Batching incremental events to avoid O.O.M during repl load 
(#4019) (Rakshith Chandraiah, reviewed by Teddy Choi)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   2 +
 ...stReplAcrossInstancesWithJsonMessageFormat.java |   1 +
 .../hive/ql/parse/TestReplicationScenarios.java|   1 +
 .../parse/TestReplicationScenariosAcidTables.java  | 156 -
 .../TestReplicationScenariosAcrossInstances.java   |   1 +
 .../TestReplicationScenariosExternalTables.java|   8 +-
 .../parse/TestTableLevelReplicationScenarios.java  |  17 ++-
 .../hadoop/hive/ql/parse/WarehouseInstance.java|   2 +-
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |  86 ++--
 .../hadoop/hive/ql/exec/repl/ReplLoadWork.java |  11 +-
 .../incremental/IncrementalLoadEventsIterator.java |  54 ++-
 .../incremental/IncrementalLoadTasksBuilder.java   |   4 +-
 .../hadoop/hive/ql/exec/repl/util/ReplUtils.java   |   2 +
 .../ql/parse/repl/dump/EventsDumpMetadata.java | 113 +++
 14 files changed, 396 insertions(+), 62 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index ea7c56d10f9..b1b441dce7b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -527,6 +527,8 @@ public class HiveConf extends Configuration {
 + "dynamically generating the next set of tasks. The number is 
approximate as Hive \n"
 + "will stop at a slightly higher number, the reason being some 
events might lead to a \n"
 + "task increment that would cross the specified limit."),
+REPL_BATCH_INCREMENTAL_EVENTS("hive.repl.batch.incremental.events", true,
+"Dump events in batches during incremental phase of repl dump"),
 
REPL_PARTITIONS_DUMP_PARALLELISM("hive.repl.partitions.dump.parallelism",100,
 "Number of threads that will be used to dump partition data 
information during repl dump."),
 REPL_TABLE_DUMP_PARALLELISM("hive.repl.table.dump.parallelism", 15,
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplAcrossInstancesWithJsonMessageFormat.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplAcrossInstancesWithJsonMessageFormat.java
index 3d8e3984c6e..4e2d322c194 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplAcrossInstancesWithJsonMessageFormat.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplAcrossInstancesWithJsonMessageFormat.java
@@ -48,6 +48,7 @@ public class TestReplAcrossInstancesWithJsonMessageFormat
 "true");
 overrides.put(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname,
 UserGroupInformation.getCurrentUser().getUserName());
+overrides.put(HiveConf.ConfVars.REPL_BATCH_INCREMENTAL_EVENTS.varname, 
"false");
 internalBeforeClassSetup(overrides, TestReplicationScenarios.class);
   }
 
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
index 910dda67658..9345d34bc09 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
@@ -225,6 +225,7 @@ public class TestReplicationScenarios {
 hconf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, true);
 hconf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE, true);
 hconf.setBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET, 
false);
+hconf.setBoolVar(HiveConf.ConfVars.REPL_BATCH_INCREMENTAL_EVENTS, false);
 System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " ");
 System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " ");
 
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index 22c86d3a75f..f3d086eafd9 100644
--- 
a/itests/hive-un

[hive] branch master updated: HIVE-26921: Add failover_type and failover_endpoint as new member to metadata column in replication_metrics table (#3999) (Amit Saonerkar, reviewed by Teddy Choi)

2023-02-03 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 0d8ca3ca0bd HIVE-26921: Add failover_type and failover_endpoint as new 
member to metadata column in replication_metrics table (#3999) (Amit Saonerkar, 
reviewed by Teddy Choi)
0d8ca3ca0bd is described below

commit 0d8ca3ca0bd5d3e8ed53cf1bc4e6dfd7efa0fd3f
Author: atsaonerk <106254618+atsaon...@users.noreply.github.com>
AuthorDate: Fri Feb 3 13:47:52 2023 +0530

HIVE-26921: Add failover_type and failover_endpoint as new member to 
metadata column in replication_metrics table (#3999) (Amit Saonerkar, reviewed 
by Teddy Choi)
---
 .../parse/TestReplicationOptimisedBootstrap.java   |  2 +-
 .../hive/ql/exec/repl/OptimisedBootstrapUtils.java |  2 +-
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 67 +++---
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  5 --
 .../incremental/IncrementalLoadTasksBuilder.java   | 17 +-
 .../hadoop/hive/ql/exec/repl/util/ReplUtils.java   | 15 +++--
 .../hive/ql/parse/ReplicationSemanticAnalyzer.java | 38 +---
 .../OptimizedBootstrapDumpMetricCollector.java |  6 +-
 .../PreOptimizedBootstrapDumpMetricCollector.java  |  5 +-
 .../OptimizedBootstrapLoadMetricCollector.java |  5 +-
 .../PreOptimizedBootstrapLoadMetricCollector.java  |  5 +-
 .../repl/metric/ReplicationMetricCollector.java| 24 +++-
 .../hive/ql/parse/repl/metric/event/Metadata.java  | 12 
 .../metric/TestReplicationMetricCollector.java | 23 +---
 .../repl/metric/TestReplicationMetricSink.java |  8 ++-
 .../llap/replication_metrics_ingest.q.out  |  4 +-
 .../apache/hadoop/hive/common/repl/ReplConst.java  | 10 
 17 files changed, 186 insertions(+), 62 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index a55b7c8a5b4..1068fe4ecba 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -547,7 +547,7 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 
 // this load should throw exception
 List finalWithClause = withClause;
-assertThrows("Should fail with db doesn't exist exception", 
HiveException.class, () -> {
+assertThrows("Should fail with db doesn't exist exception", 
SemanticException.class, () -> {
   primary.load(primaryDbName, replicatedDbName, finalWithClause);
 });
   }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/OptimisedBootstrapUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/OptimisedBootstrapUtils.java
index 9ff0d244bbd..13ecf255718 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/OptimisedBootstrapUtils.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/OptimisedBootstrapUtils.java
@@ -88,7 +88,7 @@ public class OptimisedBootstrapUtils {
* @return true, if the database has repl.target.for property set.
* @throws HiveException
*/
-  public static boolean isFailover(String dbName, Hive hive) throws 
HiveException {
+  public static boolean isDbTargetOfFailover(String dbName, Hive hive) throws 
HiveException {
 Database database = hive.getDatabase(dbName);
 return database != null ? MetaStoreUtils.isTargetOfReplication(database) : 
false;
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
index a2b1a900ae9..edbe52a2038 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
@@ -145,7 +145,7 @@ import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.getEve
 import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.getReplEventIdFromDatabase;
 import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.getTablesFromTableDiffFile;
 import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.getTargetEventId;
-import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.isFailover;
+import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.isDbTargetOfFailover;
 import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.isFirstIncrementalPending;
 import static org.apache.hadoop.hive.ql.exec.repl.ReplAck.LOAD_ACKNOWLEDGEMENT;
 import static 
org.apache.hadoop.hive.ql.exec.repl.ReplAck.NON_RECOVERABLE_MARKER;
@@ -208,28 +208,42 @@ public class ReplDumpTask exten

[hive] branch master updated: HIVE-26601: Registering table metric during second load cycle of optimized bootstrap (#3992) (Vinit Patni, reviewed by Teddy Choi)

2023-02-02 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 870713ce031 HIVE-26601: Registering table metric during second load 
cycle of optimized bootstrap (#3992) (Vinit Patni, reviewed by Teddy Choi)
870713ce031 is described below

commit 870713ce031b346cdd9008a3217d8cc806ea9f7a
Author: vinitpatni 
AuthorDate: Fri Feb 3 13:13:30 2023 +0530

HIVE-26601: Registering table metric during second load cycle of optimized 
bootstrap (#3992) (Vinit Patni, reviewed by Teddy Choi)
---
 .../parse/TestReplicationOptimisedBootstrap.java   | 81 ++
 .../hadoop/hive/ql/exec/repl/ReplLoadWork.java |  6 +-
 .../incremental/IncrementalLoadTasksBuilder.java   |  7 +-
 3 files changed, 90 insertions(+), 4 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 4959bacf5ad..a55b7c8a5b4 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -992,6 +992,87 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 assertEquals(tableMetric.getTotalCount(), tableDiffEntries.size());
   }
 
+  @Test
+  public void testTblMetricRegisterDuringSecondLoadCycleOfOptimizedBootstrap() 
throws Throwable {
+List withClause = 
ReplicationTestUtils.includeExternalTableClause(false);
+withClause.add("'" + HiveConf.ConfVars.REPLDIR.varname + "'='" + 
primary.repldDir + "'");
+WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName)
+.run("create table t1_managed (id int) clustered by(id) into 3 
buckets stored as orc " +
+"tblproperties (\"transactional\"=\"true\")")
+.run("insert into table t1_managed values (10)")
+.run("insert into table t1_managed values (20),(31),(42)")
+.dump(primaryDbName, withClause);
+
+// Do the bootstrap load and check all the external & managed tables are 
present.
+replica.load(replicatedDbName, primaryDbName, withClause)
+.run("repl status " + replicatedDbName)
+.verifyResult(tuple.lastReplicationId)
+.run("use " + replicatedDbName)
+.run("show tables")
+.verifyResults(new String[]{"t1_managed"})
+.verifyReplTargetProperty(replicatedDbName);
+
+// Do an incremental dump & load, Add one table which we can drop & an 
empty table as well.
+tuple = primary.run("use " + primaryDbName)
+.run("create table t2_managed (id int) clustered by(id) into 3 
buckets stored as orc " +
+"tblproperties (\"transactional\"=\"true\")")
+.run("insert into table t2_managed values (10)")
+.run("insert into table t2_managed values (20),(31),(42)")
+.dump(primaryDbName, withClause);
+
+replica.load(replicatedDbName, primaryDbName, withClause)
+.run("use " + replicatedDbName)
+.run("show tables")
+.verifyResults(new String[]{"t1_managed", "t2_managed"})
+.verifyReplTargetProperty(replicatedDbName);
+
+primary.run("use " + primaryDbName)
+.run("insert into table t1_managed values (30)")
+.run("insert into table t1_managed values (50),(51),(52)");
+
+// Prepare for reverse replication.
+DistributedFileSystem replicaFs = replica.miniDFSCluster.getFileSystem();
+Path newReplDir = new Path(replica.repldDir + "1");
+replicaFs.mkdirs(newReplDir);
+withClause = ReplicationTestUtils.includeExternalTableClause(false);
+withClause.add("'" + HiveConf.ConfVars.REPLDIR.varname + "'='" + 
newReplDir + "'");
+
+
+// Do a reverse dump
+tuple = replica.dump(replicatedDbName, withClause);
+
+// Check the event ack file got created.
+assertTrue(new Path(tuple.dumpLocation, EVENT_ACK_FILE).toString() + " 
doesn't exist",
+replicaFs.exists(new Path(tuple.dumpLocation, EVENT_ACK_FILE)));
+
+
+// Do a load, this should create a table_diff_complete directory
+primary.load(primaryDbName,replicatedDbName, withClause);
+
+// Check the table diff directory exist.
+assertTrue(new Path(tuple.dumpLocation, 
TABLE_DIFF_COMPLETE_DIRECTORY).toStr

[hive] branch master updated: HIVE-26963: Unset repl.faliover.endpoint during second cycle of optimized bootstrap (#4006) (Rakshith Chandraiah, reviewed by Teddy Choi)

2023-02-02 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new a8151681965 HIVE-26963: Unset repl.faliover.endpoint during second 
cycle of optimized bootstrap (#4006) (Rakshith Chandraiah, reviewed by Teddy 
Choi)
a8151681965 is described below

commit a8151681965ceab430b3d778ad996dd0af560934
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Fri Feb 3 10:04:56 2023 +0530

HIVE-26963: Unset repl.faliover.endpoint during second cycle of optimized 
bootstrap (#4006) (Rakshith Chandraiah, reviewed by Teddy Choi)
---
 .../parse/TestReplicationOptimisedBootstrap.java   | 63 ++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |  5 +-
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  4 ++
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 182cb966dfc..4959bacf5ad 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -66,6 +66,7 @@ import static 
org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_DONT_SET;
 import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_RESET;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DB_PROPERTY;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_FAILOVER_ENDPOINT;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.TARGET_OF_REPLICATION;
 import static 
org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION;
 import static 
org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils.EVENT_ACK_FILE;
@@ -1330,4 +1331,66 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName),
 MetaStoreUtils.FailoverEndpoint.TARGET));
   }
+  @Test
+  public void testOptimizedBootstrapWithControlledFailover() throws Throwable {
+primary.run("use " + primaryDbName)
+.run("create  table t1 (id string)")
+.run("insert into table t1 values ('A')")
+.dump(primaryDbName);
+replica.load(replicatedDbName, primaryDbName);
+
+primary.dump(primaryDbName);
+replica.load(replicatedDbName, primaryDbName);
+//initiate a controlled failover from primary to replica.
+List failoverConfigs = Arrays.asList("'" + 
HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
+primary.dump(primaryDbName, failoverConfigs);
+replica.load(replicatedDbName, primaryDbName, failoverConfigs);
+
+primary.run("use " + primaryDbName)
+.run("create  table t3 (id int)")
+.run("insert into t3 values(1),(2),(3)")
+.run("insert into t1 values('B')"); //modify primary after 
failover.
+
+// initiate first cycle of optimized bootstrap
+WarehouseInstance.Tuple reverseDump = replica.run("use " + 
replicatedDbName)
+.run("create table t2 (col int)")
+.run("insert into t2 values(1),(2)")
+.dump(replicatedDbName);
+
+FileSystem fs = new Path(reverseDump.dumpLocation).getFileSystem(conf);
+assertTrue(fs.exists(new Path(reverseDump.dumpLocation, EVENT_ACK_FILE)));
+
+primary.load(primaryDbName, replicatedDbName);
+
+assertEquals(MetaStoreUtils.FailoverEndpoint.SOURCE.toString(),
+
primary.getDatabase(primaryDbName).getParameters().get(REPL_FAILOVER_ENDPOINT));
+
+assertEquals(MetaStoreUtils.FailoverEndpoint.TARGET.toString(),
+
replica.getDatabase(replicatedDbName).getParameters().get(REPL_FAILOVER_ENDPOINT));
+
+assertTrue(fs.exists(new Path(reverseDump.dumpLocation, 
TABLE_DIFF_COMPLETE_DIRECTORY)));
+HashSet tableDiffEntries = getTablesFromTableDiffFile(new 
Path(reverseDump.dumpLocation), conf);
+assertTrue(!tableDiffEntries.isEmpty());
+
+
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName),
+MetaStoreUtils.FailoverEndpoint.SOURCE));
+
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica.getDatabase(replicatedDbName),
+MetaStoreUtils.FailoverEndpoint.TARGET));
+
+// second cycle of optimized bootstrap
+reverseDump = replica.dump(replicatedDbName);
+assertTrue(fs.exists(new Path(re

[hive] branch master updated: HIVE-26931: REPL LOAD command does not throw any error for incorrect syntax (Subhasis Gorai, reviewed by Teddy Choi)

2023-01-31 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ff38313c15d HIVE-26931: REPL LOAD command does not throw any error for 
incorrect syntax (Subhasis Gorai, reviewed by Teddy Choi)
ff38313c15d is described below

commit ff38313c15d804db4ac63384498b02598e60fb57
Author: Subhasis Gorai 
AuthorDate: Wed Feb 1 10:10:00 2023 +0530

HIVE-26931: REPL LOAD command does not throw any error for incorrect syntax 
(Subhasis Gorai, reviewed by Teddy Choi)
---
 .../hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java | 2 +-
 .../hive/ql/parse/TestReplicationScenariosAcrossInstances.java   | 2 +-
 .../org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index e23e9d4bb00..22c86d3a75f 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -2098,7 +2098,7 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
   replica.loadWithoutExplain("", "`*`");
   fail();
 } catch (HiveException e) {
-  assertEquals("MetaException(message:Database name cannot be null.)", 
e.getMessage());
+  assertEquals("REPL LOAD Target database name shouldn't be null", 
e.getMessage());
 }
   }
 
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java
index 20e189e9f6b..77e7390e8b1 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java
@@ -701,7 +701,7 @@ public class TestReplicationScenariosAcrossInstances 
extends BaseReplicationAcro
   replica.load("", "`*`");
   Assert.fail();
 } catch (HiveException e) {
-  assertEquals("MetaException(message:Database name cannot be null.)", 
e.getMessage());
+  assertEquals("REPL LOAD Target database name shouldn't be null", 
e.getMessage());
 }
   }
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java
index f32c38be282..aad3a8aac4c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java
@@ -320,7 +320,8 @@ public class ReplicationSemanticAnalyzer extends 
BaseSemanticAnalyzer {
 // looking at each db, and then each table, and then setting up the 
appropriate
 // import job in its place.
 try {
-  assert (sourceDbNameOrPattern != null);
+  Objects.requireNonNull(sourceDbNameOrPattern, "REPL LOAD Source database 
name shouldn't be null");
+  Objects.requireNonNull(replScope.getDbName(), "REPL LOAD Target database 
name shouldn't be null");
   Path loadPath = getCurrentLoadPath();
 
   // Now, the dumped path can be one of three things:
@@ -370,7 +371,7 @@ public class ReplicationSemanticAnalyzer extends 
BaseSemanticAnalyzer {
 }
   } else {
 ReplUtils.reportStatusInReplicationMetrics("REPL_LOAD", 
Status.SKIPPED, null, conf,  sourceDbNameOrPattern, null);
-LOG.warn("Previous Dump Already Loaded");
+LOG.warn("No dump to load or the previous dump already loaded");
   }
 } catch (Exception e) {
   // TODO : simple wrap & rethrow for now, clean up with error codes

[hive] branch master updated: HIVE-26960: Fix optimized bootstrap when primary is modified only by addition of new tables (Rakshith Chandraiah, reviewed by Teddy Choi)

2023-01-31 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new e20102c6201 HIVE-26960: Fix optimized bootstrap when primary is 
modified only by addition of new tables (Rakshith Chandraiah, reviewed by Teddy 
Choi)
e20102c6201 is described below

commit e20102c62015bb74b453a3460352fd789bf9edc7
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Wed Feb 1 06:52:11 2023 +0530

HIVE-26960: Fix optimized bootstrap when primary is modified only by 
addition of new tables (Rakshith Chandraiah, reviewed by Teddy Choi)
---
 .../parse/TestReplicationOptimisedBootstrap.java   |  7 --
 .../hadoop/hive/ql/exec/repl/ReplLoadWork.java | 27 +-
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 8d9429e33b8..182cb966dfc 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -1202,7 +1202,8 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 
 // make some changes on primary
 primary.run("use " + primaryDbName)
-.run("insert into table t1 values (4)");
+.run("create table t2(name string) stored as orc 
tblproperties(\"transactional\"=\"true\")")
+.run("insert into t2 values('a')");
 
 withClause = Arrays.asList(
 String.format("'%s'='%s'", 
HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false")
@@ -1232,7 +1233,9 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 }
 // ensure optimized bootstrap was successful.
 primary.run(String.format("select * from %s.t1", primaryDbName))
-.verifyResults(new String[]{"1", "2", "3"});
+.verifyResults(new String[]{"1", "2", "3"})
+.run("show tables in "+primaryDbName)
+.verifyResults(new String[]{"t1"});
   }
   @Test
   public void testReverseFailoverBeforeOptimizedBootstrap() throws Throwable {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadWork.java
index b6072912c93..2c379472d3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadWork.java
@@ -159,20 +159,25 @@ public class ReplLoadWork implements Serializable, 
ReplLoadWorkMBean {
* for the same.
*/
   Path incBootstrapDir = new Path(dumpDirectory, 
ReplUtils.INC_BOOTSTRAP_ROOT_DIR_NAME);
-  if (fs.exists(incBootstrapDir)) {
-if (isSecondFailover) {
-  String[] bootstrappedTables = getBootstrapTableList(new 
Path(dumpDirectory).getParent(), hiveConf);
-  LOG.info("Optimised bootstrap load for database {} with initial 
bootstrapped table list as {}",
-  dbNameToLoadIn, tablesToBootstrap);
-  // Get list of tables bootstrapped.
+  if (isSecondFailover) {
+String[] bootstrappedTables = getBootstrapTableList(new 
Path(dumpDirectory).getParent(), hiveConf);
+LOG.info("Optimised bootstrap load for database {} with initial 
bootstrapped table list as {}",
+dbNameToLoadIn, tablesToBootstrap);
+// Get list of tables bootstrapped.
+if (fs.exists(incBootstrapDir)) {
   Path tableMetaPath = new Path(incBootstrapDir, 
EximUtil.METADATA_PATH_NAME + "/" + sourceDbName);
   tablesToBootstrap =
-  Stream.of(fs.listStatus(tableMetaPath)).map(st -> 
st.getPath().getName()).collect(Collectors.toList());
-  List tableList = Arrays.asList(bootstrappedTables);
-  tablesToDrop = ListUtils.subtract(tableList, tablesToBootstrap);
-  LOG.info("Optimised bootstrap for database {} with drop table list 
as {} and bootstrap table list as {}",
-  dbNameToLoadIn, tablesToDrop, tablesToBootstrap);
+  Stream.of(fs.listStatus(tableMetaPath)).map(st -> 
st.getPath().getName()).collect(Collectors.toList());
+}
+else {
+  tablesToBootstrap = Collections.emptyList();
 }
+List tableList = Arrays.asList(bootstrappedTables);
+tablesToDrop = ListUtils.subtract(tableList, tablesToBootstrap);
+

[hive] branch master updated: HIVE-26606: Expose failover states in replication metrics (Harshal Patel, reviewed by Teddy Choi)

2023-01-30 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 0a5558d71c4 HIVE-26606: Expose failover states in replication metrics 
(Harshal Patel, reviewed by Teddy Choi)
0a5558d71c4 is described below

commit 0a5558d71c474f285c1e6d338a4be93bd1c6ce13
Author: harshal-16 <109334642+harshal...@users.noreply.github.com>
AuthorDate: Tue Jan 31 10:23:45 2023 +0530

HIVE-26606: Expose failover states in replication metrics (Harshal Patel, 
reviewed by Teddy Choi)
---
 .../parse/TestReplicationScenariosAcidTables.java  |  8 +-
 .../hive/ql/exec/repl/OptimisedBootstrapUtils.java |  2 +-
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 40 ++---
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  5 ++
 .../hadoop/hive/ql/exec/repl/util/ReplUtils.java   | 10 ++-
 .../hive/ql/parse/ReplicationSemanticAnalyzer.java | 40 +
 .../apache/hadoop/hive/ql/parse/repl/DumpType.java | 12 +++
 .../dump/metric/BootstrapDumpMetricCollector.java  |  4 +-
 .../metric/IncrementalDumpMetricCollector.java |  4 +-
 ... => OptimizedBootstrapDumpMetricCollector.java} | 11 +--
 ... PreOptimizedBootstrapDumpMetricCollector.java} | 11 +--
 .../hive/ql/parse/repl/load/DumpMetaData.java  | 23 +
 .../OptimizedBootstrapLoadMetricCollector.java}| 12 +--
 .../PreOptimizedBootstrapLoadMetricCollector.java} | 13 ++-
 .../hive/ql/parse/repl/metric/event/Metadata.java  |  4 +-
 .../metric/TestReplicationMetricCollector.java | 99 --
 .../repl/metric/TestReplicationMetricSink.java |  8 +-
 .../TestReplicationMetricUpdateOnFailure.java  |  6 +-
 .../llap/replication_metrics_ingest.q.out  |  2 +-
 19 files changed, 232 insertions(+), 82 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index d09120bcc8c..e23e9d4bb00 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -397,7 +397,7 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 assertTrue(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
 dumpPath = new Path(reverseDumpData.dumpLocation, 
ReplUtils.REPL_HIVE_BASE_DIR);
 assertFalse(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
-assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.INCREMENTAL);
+assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.PRE_OPTIMIZED_BOOTSTRAP);
 assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString(;
 db = replica.getDatabase(replicatedDbName);
 assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, 
MetaStoreUtils.FailoverEndpoint.TARGET));
@@ -625,7 +625,7 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
 assertTrue(fs.exists(dumpAckFile));
 assertFalse(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
-assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.INCREMENTAL);
+assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.PRE_OPTIMIZED_BOOTSTRAP);
 db = replica.getDatabase(replicatedDbName);
 assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, 
MetaStoreUtils.FailoverEndpoint.TARGET));
 assertTrue(MetaStoreUtils.isTargetOfReplication(db));
@@ -734,7 +734,7 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 Path dumpAckFile = new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString());
 assertTrue(fs.exists(dumpAckFile));
 assertFalse(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
-assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.INCREMENTAL);
+assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.PRE_OPTIMIZED_BOOTSTRAP);
 db = replica.getDatabase(replicatedDbName);
 assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, 
MetaStoreUtils.FailoverEndpoint.TARGET));
 assertTrue(MetaStoreUtils.isTargetOfReplication(db));
@@ -748,7 +748,7 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 assertTrue(fs.exists(new Path(preFailoverDumpData.dumpLocation)));
 assertNotEquals(reverseDumpData.dumpLocation, dumpData.dumpLocation);
 assertFalse(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
-assertTrue(new DumpMetaData

[hive] branch master updated: HIVE-26600: Handle failover during optimized bootstrap (Rakshith Chandraiah, reviewed by Teddy Choi)

2023-01-29 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d6b11487b88 HIVE-26600: Handle failover during optimized bootstrap 
(Rakshith Chandraiah, reviewed by Teddy Choi)
d6b11487b88 is described below

commit d6b11487b8816e1a87eb1fa66b7c375952feeb1f
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Mon Jan 30 07:17:48 2023 +0530

HIVE-26600: Handle failover during optimized bootstrap (Rakshith 
Chandraiah, reviewed by Teddy Choi)
---
 .../parse/TestReplicationOptimisedBootstrap.java   | 96 ++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 13 ++-
 2 files changed, 102 insertions(+), 7 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 42ef25756ae..8d9429e33b8 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse;
 
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.QuotaUsage;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -32,6 +33,8 @@ import 
org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncod
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.exec.repl.ReplAck;
+import org.apache.hadoop.hive.ql.parse.repl.load.FailoverMetaData;
 import org.apache.hadoop.hive.ql.parse.repl.metric.MetricCollector;
 import org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils;
 import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils;
@@ -1231,4 +1234,97 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 primary.run(String.format("select * from %s.t1", primaryDbName))
 .verifyResults(new String[]{"1", "2", "3"});
   }
+  @Test
+  public void testReverseFailoverBeforeOptimizedBootstrap() throws Throwable {
+primary.run("use " + primaryDbName)
+.run("create  table t1 (id string)")
+.run("insert into table t1 values ('A')")
+.dump(primaryDbName);
+replica.load(replicatedDbName, primaryDbName);
+
+primary.dump(primaryDbName);
+replica.load(replicatedDbName, primaryDbName);
+//initiate a controlled failover from primary to replica.
+List failoverConfigs = Arrays.asList("'" + 
HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
+primary.dump(primaryDbName, failoverConfigs);
+replica.load(replicatedDbName, primaryDbName, failoverConfigs);
+primary.run("use " + primaryDbName)
+.run("insert into t1 values('B')"); //modify primary after 
failover.
+//initiate a controlled failover from replica to primary before the first 
cycle of optimized bootstrap is run.
+WarehouseInstance.Tuple reverseDump = replica.run("use " + 
replicatedDbName)
+.run("create table t2 (col int)")
+.run("insert into t2 values(1),(2)")
+.dump(replicatedDbName, failoverConfigs);
+
+// the first reverse dump should NOT be failover ready.
+FileSystem fs = new Path(reverseDump.dumpLocation).getFileSystem(conf);
+assertTrue(fs.exists(new Path(reverseDump.dumpLocation, EVENT_ACK_FILE)));
+Path dumpPath = new Path(reverseDump.dumpLocation, 
ReplUtils.REPL_HIVE_BASE_DIR);
+assertFalse(fs.exists(new Path(dumpPath, 
FailoverMetaData.FAILOVER_METADATA)));
+assertFalse(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
+// ensure load was successful.
+primary.load(primaryDbName, replicatedDbName, failoverConfigs);
+assertTrue(fs.exists(new Path(reverseDump.dumpLocation, 
TABLE_DIFF_COMPLETE_DIRECTORY)));
+assertTrue(fs.exists(new Path(dumpPath, 
ReplAck.LOAD_ACKNOWLEDGEMENT.toString(;
+HashSet tableDiffEntries = getTablesFromTableDiffFile(new 
Path(reverseDump.dumpLocation), conf);
+assertTrue(!tableDiffEntries.isEmpty()); // we have modified a table t1 at 
source
+
+
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName),
+MetaStoreUtils.FailoverEndpoint.SOURCE));
+
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(replica

[hive] branch master updated (ebca56ce581 -> cd171401b39)

2023-01-29 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from ebca56ce581 HIVE-26939 : Changed placement policy from NODE to node 
for Hadoop 3.3.4 to parse correctly using Jackson 2.12.7 (#3941) (Aman Raj 
reviewed by Laszlo Bodor)
 add cd171401b39 HIVE-26933: Cleanup dump directory for eventId which was 
failed in previous dump cycle (Harshal Patel, reviewed by Teddy Choi)

No new revisions were added by this update.

Summary of changes:
 .../apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java   | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

[hive] branch master updated: HIVE-26599: Registering Tables metric during second cycle of optimised bootstrap (Vinit Patni, reviewed by Teddy Choi)

2023-01-26 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new afd2722310c HIVE-26599: Registering Tables metric during second cycle 
of optimised bootstrap (Vinit Patni, reviewed by Teddy Choi)
afd2722310c is described below

commit afd2722310c712b504dff74082f9865c31d5a187
Author: vinitpatni 
AuthorDate: Fri Jan 27 13:06:15 2023 +0530

HIVE-26599: Registering Tables metric during second cycle of optimised 
bootstrap (Vinit Patni, reviewed by Teddy Choi)
---
 .../parse/TestReplicationOptimisedBootstrap.java   | 82 ++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |  4 ++
 2 files changed, 86 insertions(+)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 396abd24b47..42ef25756ae 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -32,11 +32,17 @@ import 
org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncod
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.parse.repl.metric.MetricCollector;
 import org.apache.hadoop.hive.ql.exec.repl.OptimisedBootstrapUtils;
 import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.Status;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.ReplicationMetric;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.Stage;
+import org.apache.hadoop.hive.ql.parse.repl.metric.event.Metric;
+import static 
org.apache.hadoop.hive.ql.parse.repl.metric.ReplicationMetricCollector.isMetricsEnabledForTests;
 
 import org.jetbrains.annotations.NotNull;
 import org.junit.After;
@@ -906,6 +912,82 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 .verifyFailure(new String[]{"tnew_managed"});
   }
 
+  @Test
+  public void testTblMetricRegisterDuringSecondCycleOfOptimizedBootstrap() 
throws Throwable {
+List withClause = 
ReplicationTestUtils.includeExternalTableClause(false);
+withClause.add("'" + HiveConf.ConfVars.REPLDIR.varname + "'='" + 
primary.repldDir + "'");
+WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName)
+.run("create table t1_managed (id int) clustered by(id) into 3 
buckets stored as orc " +
+"tblproperties (\"transactional\"=\"true\")")
+.run("insert into table t1_managed values (10)")
+.run("insert into table t1_managed values (20),(31),(42)")
+.dump(primaryDbName, withClause);
+
+// Do the bootstrap load and check all the external & managed tables are 
present.
+replica.load(replicatedDbName, primaryDbName, withClause)
+.run("repl status " + replicatedDbName)
+.verifyResult(tuple.lastReplicationId)
+.run("use " + replicatedDbName)
+.run("show tables")
+.verifyResults(new String[]{"t1_managed"})
+.verifyReplTargetProperty(replicatedDbName);
+
+// Do an incremental dump & load, Add one table which we can drop & an 
empty table as well.
+tuple = primary.run("use " + primaryDbName)
+.run("create table t2_managed (id int) clustered by(id) into 3 
buckets stored as orc " +
+"tblproperties (\"transactional\"=\"true\")")
+.run("insert into table t2_managed values (10)")
+.run("insert into table t2_managed values (20),(31),(42)")
+.dump(primaryDbName, withClause);
+
+replica.load(replicatedDbName, primaryDbName, withClause)
+.run("use " + replicatedDbName)
+.run("show tables")
+.verifyResults(new String[]{"t1_managed", "t2_managed"})
+.verifyReplTargetProperty(replicatedDbName);
+
+primary.run("use " + primaryDbName)
+.run("insert into table t1_managed values (30)")
+.run("insert into table t1_managed values (50),(51),(52)");
+
+

[hive] branch master updated (ed3261f3a4e -> c1294a68d4e)

2023-01-19 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from ed3261f3a4e HIVE-26925: MV with iceberg storage format fails when 
contains 'PARTITIONED ON' clause due to column number/types difference. 
(Krisztian Kasa, reviewed by Aman Sinha, Stamatis Zampetakis)
 add c1294a68d4e HIVE-26942 : Prevent updation of "repl.target.last.id" 
when RESUME action initiated (Shreenidhi Saigaonkar, reviewed by Teddy Choi)

No new revisions were added by this update.

Summary of changes:
 .../parse/TestReplicationScenariosAcidTables.java  | 27 ++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |  2 ++
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  2 ++
 .../apache/hadoop/hive/common/repl/ReplConst.java  |  7 ++
 .../apache/hadoop/hive/metastore/HMSHandler.java   |  4 +++-
 5 files changed, 41 insertions(+), 1 deletion(-)

[hive] branch master updated: HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)

2023-01-18 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d480aa575f0 HIVE-26598: Fix unsetting of db params for optimized 
bootstrap when repl dump initiates data copy (Rakshith Chandraiah, reviewed by 
Teddy Choi)
d480aa575f0 is described below

commit d480aa575f09e815bd169c4e2cff0f337eea6371
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Thu Jan 19 10:46:28 2023 +0530

HIVE-26598: Fix unsetting of db params for optimized bootstrap when repl 
dump initiates data copy (Rakshith Chandraiah, reviewed by Teddy Choi)

Co-authored-by: rakshithc 
---
 .../parse/TestReplicationOptimisedBootstrap.java   | 57 ++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |  6 +--
 .../hadoop/hive/ql/exec/repl/ReplDumpWork.java |  9 
 3 files changed, 68 insertions(+), 4 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 165b4d438e2..396abd24b47 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -55,6 +55,7 @@ import java.util.Map;
 
 import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_DONT_SET;
 import static org.apache.hadoop.hdfs.protocol.HdfsConstants.QUOTA_RESET;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_TARGET_DB_PROPERTY;
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.TARGET_OF_REPLICATION;
 import static 
org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION;
@@ -1092,4 +1093,60 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
 }
 return txnHandler.getOpenTxns(txnListExcludingReplCreated).getOpen_txns();
   }
+
+  @Test
+  public void testDbParametersAfterOptimizedBootstrap() throws Throwable {
+List withClause = Arrays.asList(
+String.format("'%s'='%s'", 
HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"),
+String.format("'%s'='%s'", 
HiveConf.ConfVars.HIVE_REPL_FAILOVER_START.varname, "true")
+);
+
+// bootstrap
+primary.run("use " + primaryDbName)
+.run("create table t1 (id int) clustered by(id) into 3 buckets 
stored as orc " +
+"tblproperties (\"transactional\"=\"true\")")
+.run("insert into table t1 values (1),(2)")
+.dump(primaryDbName, withClause);
+replica.load(replicatedDbName, primaryDbName, withClause);
+
+// incremental
+primary.run("use " + primaryDbName)
+.run("insert into table t1 values (3)")
+.dump(primaryDbName, withClause);
+replica.load(replicatedDbName, primaryDbName, withClause);
+
+// make some changes on primary
+primary.run("use " + primaryDbName)
+.run("insert into table t1 values (4)");
+
+withClause = Arrays.asList(
+String.format("'%s'='%s'", 
HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false")
+);
+// 1st cycle of optimized bootstrap
+replica.dump(replicatedDbName, withClause);
+primary.load(primaryDbName, replicatedDbName, withClause);
+
+String[] dbParams = new String[]{
+TARGET_OF_REPLICATION,
+CURR_STATE_ID_SOURCE.toString(),
+CURR_STATE_ID_TARGET.toString(),
+REPL_TARGET_DB_PROPERTY,
+REPL_ENABLE_BACKGROUND_THREAD
+};
+//verify if all db parameters are set
+for (String paramKey : dbParams) {
+  
assertTrue(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey));
+}
+
+// 2nd cycle of optimized bootstrap
+replica.dump(replicatedDbName, withClause);
+primary.load(primaryDbName, replicatedDbName, withClause);
+
+for (String paramKey : dbParams) {
+  
assertFalse(replica.getDatabase(replicatedDbName).getParameters().containsKey(paramKey));
+}
+// ensure optimized bootstrap was successful.
+primary.run(String.format("select * from %s.t1", primaryDbName))
+.verifyResults(new String[]{"1", "2", "3"});
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
index e7c329a5f25..02815334fc5 100

[hive] branch master updated (61b9683524f -> ad9b463575c)

2023-01-18 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from 61b9683524f HIVE-26922: Deadlock when rebuilding Materialized view 
stored by Iceberg (Krisztian Kasa, reviewed by Aman Sinha, Stamatis Zampetakis, 
Steve Carlin)
 add ad9b463575c HIVE-26711 The very first REPL Load should make the Target 
Database read-only. (Shreenidhi Saigaonkar, reviewed by Teddy Choi)

No new revisions were added by this update.

Summary of changes:
 .../hive/ql/parse/TestReplWithReadOnlyHook.java| 129 +
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  43 +++
 .../apache/hadoop/hive/common/repl/ReplConst.java  |   2 +
 3 files changed, 174 insertions(+)
 create mode 100644 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplWithReadOnlyHook.java

[hive] branch master updated (f2e908c562c -> 55471330426)

2023-01-18 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from f2e908c562c HIVE-26717: Query based Rebalance compaction on 
insert-only tables (Laszlo Vegh, reviewed by Denys Kuzmenko, Krisztian Kasa)
 add 55471330426 HIVE-26943 : Fix NPE during Optimised Bootstrap when db is 
dropped (Shreenidhi Saigaonkar, reviewed by Ayush Saxena and Teddy Choi)

No new revisions were added by this update.

Summary of changes:
 .../parse/TestReplicationOptimisedBootstrap.java   | 57 ++
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  4 ++
 2 files changed, 61 insertions(+)

[hive] branch master updated: HIVE-26597: Fix unsetting of db property repl.target.for in ReplicationSemanticAnalyzer (Rakshith Chandraiah, reviewed by Teddy Choi)

2023-01-17 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new c92a478e514 HIVE-26597: Fix unsetting of db property repl.target.for 
in ReplicationSemanticAnalyzer (Rakshith Chandraiah, reviewed by Teddy Choi)
c92a478e514 is described below

commit c92a478e514a28a53009fe5fbf08ce6fa35b58b9
Author: Rakshith C <56068841+rakshith...@users.noreply.github.com>
AuthorDate: Wed Jan 18 10:05:44 2023 +0530

HIVE-26597: Fix unsetting of db property repl.target.for in 
ReplicationSemanticAnalyzer (Rakshith Chandraiah, reviewed by Teddy Choi)
---
 .../parse/TestReplicationScenariosAcidTables.java  | 65 +-
 .../hive/ql/parse/ReplicationSemanticAnalyzer.java | 20 ---
 2 files changed, 49 insertions(+), 36 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index 13bb9ad1c64..4c82a42badc 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -383,10 +383,6 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName),
 MetaStoreUtils.FailoverEndpoint.SOURCE));
 
-primary.run("drop database if exists " + primaryDbName + " cascade");
-
-assertTrue(primary.getDatabase(primaryDbName) == null);
-
 
assertFalse(ReplChangeManager.isSourceOfReplication(replica.getDatabase(replicatedDbName)));
 
 WarehouseInstance.Tuple reverseDumpData = replica.run("create table t3 (id 
int)")
@@ -398,11 +394,15 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 assertTrue(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
 dumpPath = new Path(reverseDumpData.dumpLocation, 
ReplUtils.REPL_HIVE_BASE_DIR);
 assertFalse(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
-assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.BOOTSTRAP);
+assertTrue(new DumpMetaData(dumpPath, conf).getDumpType() == 
DumpType.INCREMENTAL);
 assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString(;
 db = replica.getDatabase(replicatedDbName);
 assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, 
MetaStoreUtils.FailoverEndpoint.TARGET));
-assertFalse(MetaStoreUtils.isTargetOfReplication(db));
+assertTrue(MetaStoreUtils.isTargetOfReplication(db));
+//do a second reverse dump.
+primary.load(primaryDbName, replicatedDbName);
+reverseDumpData = replica.dump(replicatedDbName);
+dumpPath = new Path(reverseDumpData.dumpLocation, 
ReplUtils.REPL_HIVE_BASE_DIR);
 
 primary.load(primaryDbName, replicatedDbName)
 .run("use " + primaryDbName)
@@ -419,7 +419,7 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 
 Database primaryDb = primary.getDatabase(primaryDbName);
 assertFalse(primaryDb == null);
-assertTrue(ReplUtils.isFirstIncPending(primaryDb.getParameters()));
+assertFalse(ReplUtils.isFirstIncPending(primaryDb.getParameters()));
 assertTrue(MetaStoreUtils.isTargetOfReplication(primaryDb));
 assertFalse(MetaStoreUtils.isDbBeingFailedOver(primaryDb));
 assertTrue(fs.exists(new Path(dumpPath, LOAD_ACKNOWLEDGEMENT.toString(;
@@ -531,7 +531,9 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'",
 "'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
 List retainPrevDumpDir = Arrays.asList("'" + 
HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR + "'='true'",
-"'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'");
+"'" + HiveConf.ConfVars.REPL_RETAIN_PREV_DUMP_DIR_COUNT + "'='1'",
+"'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET + 
"'='true'"
+);
 WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName)
 .run("create table t1 (id int) clustered by(id) into 3 buckets 
stored as orc " +
 "tblproperties (\"transactional\"=\"true\")")
@@ -608,8 +610,6 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 assertTrue(MetaStore

[hive] branch master updated: HIVE-26558 Small test fix in TestReplicationOptimisedBootstrap to make test method independent of TxnType enums (Amit Saonerkar, reviewed by Teddy Choi)

2023-01-11 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d8a218003f4 HIVE-26558 Small test fix in 
TestReplicationOptimisedBootstrap to make test method independent of TxnType 
enums (Amit Saonerkar, reviewed by Teddy Choi)
d8a218003f4 is described below

commit d8a218003f402b368a2c01c8f91a3a684ef8ec7f
Author: atsaonerk <106254618+atsaon...@users.noreply.github.com>
AuthorDate: Wed Jan 11 14:17:10 2023 +0530

HIVE-26558 Small test fix in TestReplicationOptimisedBootstrap to make test 
method independent of TxnType enums (Amit Saonerkar, reviewed by Teddy Choi)
---
 .../hive/ql/parse/TestReplicationOptimisedBootstrap.java  | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 359c48d6605..f11adc462f3 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -1026,8 +1026,13 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosA
   }
 
   List getReplCreatedTxns() throws MetaException {
-List excludedTxns = Arrays.asList(TxnType.DEFAULT, 
TxnType.READ_ONLY, TxnType.COMPACTION,
-TxnType.MATER_VIEW_REBUILD, TxnType.SOFT_DELETE);
-return txnHandler.getOpenTxns(excludedTxns).getOpen_txns();
+List txnListExcludingReplCreated = new ArrayList<>();
+for (TxnType type : TxnType.values()) {
+  // exclude REPL_CREATED txn
+  if (type != TxnType.REPL_CREATED) {
+txnListExcludingReplCreated.add(type);
+  }
+}
+return txnHandler.getOpenTxns(txnListExcludingReplCreated).getOpen_txns();
   }
 }

[hive] branch master updated: HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed by Laszlo Bodor)

2023-01-03 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ac1cfe2294d HIVE-26555 Read-only mode for Hive database (#3614) (Teddy 
Choi, reviewed by Laszlo Bodor)
ac1cfe2294d is described below

commit ac1cfe2294d1b7b8c02b60899d69868920ede0f5
Author: Teddy Choi 
AuthorDate: Wed Jan 4 13:35:57 2023 +0900

HIVE-26555 Read-only mode for Hive database (#3614) (Teddy Choi, reviewed 
by Laszlo Bodor)
---
 .../java/org/apache/hadoop/hive/ql/ErrorMsg.java   |   1 +
 .../hive/ql/hooks/EnforceReadOnlyDatabaseHook.java | 196 +
 .../test/queries/clientnegative/read_only_delete.q |  23 +++
 .../test/queries/clientnegative/read_only_insert.q |  23 +++
 .../test/queries/clientpositive/read_only_hook.q   |  27 +++
 .../results/clientnegative/read_only_delete.q.out  |  34 
 .../results/clientnegative/read_only_insert.q.out  |  35 
 .../clientpositive/llap/read_only_hook.q.out   |  71 
 8 files changed, 410 insertions(+)

diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 1a3ad38acef..5e4fbf8e642 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -485,6 +485,7 @@ public enum ErrorMsg {
   CTLF_UNSUPPORTED_FORMAT(10434, "CREATE TABLE LIKE FILE is not supported by 
the ''{0}'' file format", true),
   NON_NATIVE_ACID_UPDATE(10435, "Update and Merge into non-native ACID table 
is only supported when " +
   HiveConf.ConfVars.SPLIT_UPDATE.varname + " is true."),
+  READ_ONLY_DATABASE(10436, "Database {0} is read-only", true),
 
   //== 2 range starts here 
//
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java 
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java
new file mode 100644
index 000..c343fc7b14f
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/EnforceReadOnlyDatabaseHook.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.hooks;
+
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.ddl.DDLDesc;
+import org.apache.hadoop.hive.ql.ddl.DDLWork;
+import 
org.apache.hadoop.hive.ql.ddl.database.alter.poperties.AlterDatabaseSetPropertiesDesc;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * EnforceReadOnlyDatabaseHook is a hook that disallows write operations on 
read-only databases.
+ * It's enforced when "hive.exec.pre.hooks" has 
"org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyDatabaseHook" and
+ * a database has 'readonly'='true' property.
+ */
+public class EnforceReadOnlyDatabaseHook implements ExecuteWithHookContext {
+  public static final String READONLY = "readonly";
+  private static final Logger LOG = 
LoggerFactory.getLogger(EnforceReadOnlyDatabaseHook.class);
+
+  @Override
+  public void run(HookContext hookContext) throws Exception {
+assert(hookContext.getHookType() == HookContext.HookType.PRE_EXEC_HOOK);
+final QueryState queryState = hookContext.getQueryState();
+final HiveOperation hiveOperation = queryState.getHiveOperation();
+
+// Allow read-only type operations, excluding query.
+// A query can be EXPLAIN, SELECT, INSERT, UPDATE, or DE

[hive] branch master updated: HIVE-26624: Set repl.background.enable on target after failover completion (Vinit Patni, reviewed by László Pintér, Teddy Choi)

2022-11-17 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 497553016d8 HIVE-26624: Set repl.background.enable on target after 
failover completion (Vinit Patni, reviewed by László Pintér, Teddy Choi)
497553016d8 is described below

commit 497553016d804576cdf2262e5869541c71e2efbd
Author: vinitpatni 
AuthorDate: Fri Nov 18 10:22:42 2022 +0530

HIVE-26624: Set repl.background.enable on target after failover completion 
(Vinit Patni, reviewed by László Pintér, Teddy Choi)

Co-authored-by: vpatni 
---
 .../parse/TestReplicationScenariosAcidTables.java  | 60 ++
 .../hadoop/hive/ql/exec/repl/ReplLoadTask.java |  2 +
 2 files changed, 62 insertions(+)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
index 7315f3565b3..13bb9ad1c64 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java
@@ -80,6 +80,7 @@ import java.util.Map;
 
 
 import static 
org.apache.hadoop.hive.common.repl.ReplConst.SOURCE_OF_REPLICATION;
+import static 
org.apache.hadoop.hive.common.repl.ReplConst.REPL_ENABLE_BACKGROUND_THREAD;
 import static org.apache.hadoop.hive.ql.exec.repl.ReplAck.DUMP_ACKNOWLEDGEMENT;
 import static org.apache.hadoop.hive.ql.exec.repl.ReplAck.LOAD_ACKNOWLEDGEMENT;
 import static org.junit.Assert.assertEquals;
@@ -855,6 +856,65 @@ public class TestReplicationScenariosAcidTables extends 
BaseReplicationScenarios
 
assertFalse(MetaStoreUtils.isDbBeingFailedOver(replica.getDatabase(replicatedDbName)));
   }
 
+  @Test
+  public void testEnablementOfReplBackgroundThreadDuringFailover() throws 
Throwable{
+List failoverConfigs = Arrays.asList("'" + 
HiveConf.ConfVars.HIVE_REPL_FAILOVER_START + "'='true'");
+
+WarehouseInstance.Tuple dumpData = primary.run("use " + primaryDbName)
+.run("create table t1 (id int) clustered by(id) into 3 buckets 
stored as orc " +
+"tblproperties (\"transactional\"=\"true\")")
+.run("create table t2 (rank int) partitioned by (name string) 
tblproperties(\"transactional\"=\"true\", " +
+"\"transactional_properties\"=\"insert_only\")")
+.dump(primaryDbName, failoverConfigs);
+
+FileSystem fs = new Path(dumpData.dumpLocation).getFileSystem(conf);
+Path dumpPath = new Path(dumpData.dumpLocation, 
ReplUtils.REPL_HIVE_BASE_DIR);
+assertFalse(fs.exists(new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString(;
+
assertFalse(MetaStoreUtils.isDbBeingFailedOver(primary.getDatabase(primaryDbName)));
+
+replica.load(replicatedDbName, primaryDbName, failoverConfigs)
+.run("use " + replicatedDbName)
+.run("show tables")
+.verifyResults(new String[]{"t1", "t2"})
+.run("repl status " + replicatedDbName)
+.verifyResult(dumpData.lastReplicationId);
+
+Database db = replica.getDatabase(replicatedDbName);
+assertTrue(MetaStoreUtils.isTargetOfReplication(db));
+assertFalse(MetaStoreUtils.isDbBeingFailedOver(db));
+
+dumpData = primary.run("use " + primaryDbName)
+.run("insert into t1 values(1)")
+.run("insert into t2 partition(name='Bob') values(11)")
+.run("insert into t2 partition(name='Carl') values(10)")
+.dump(primaryDbName, failoverConfigs);
+
+dumpPath = new Path(dumpData.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
+Path failoverReadyMarker = new Path(dumpPath, 
ReplAck.FAILOVER_READY_MARKER.toString());
+assertTrue(fs.exists(new Path(dumpPath, DUMP_ACKNOWLEDGEMENT.toString(;
+assertTrue(fs.exists(new Path(dumpPath, 
FailoverMetaData.FAILOVER_METADATA)));
+assertTrue(fs.exists(failoverReadyMarker));
+
assertTrue(MetaStoreUtils.isDbBeingFailedOverAtEndpoint(primary.getDatabase(primaryDbName),
+MetaStoreUtils.FailoverEndpoint.SOURCE));
+
+replica.load(replicatedDbName, primaryDbName, failoverConfigs)
+.run("use " + replicatedDbName)
+.run("show tables")
+.verifyResults(new String[]{"t1", "t2"})
+.run("repl status " + replicatedDbName)
+.verifyResult(dumpData.lastReplicationId)
+.run("select id from t1")
+

[hive] branch master updated (7a238e67ad4 -> f698642b517)

2022-11-10 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from 7a238e67ad4 HIVE-26667: Incompatible expression deserialization 
against latest HMS (Zhihua Deng reviewed by Sai Hemanth)
 add f698642b517 HIVE-26437: Dump unpartitioned tables in parallel. (Amit 
Saonerkar reviewed by Teddy Choi, Rajesh Balamohan)

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   2 +
 .../hive/benchmark/ql/exec/DumbTableExport.java|  21 ++--
 .../ql/exec/TableAndPartitionExportBench.java  |  93 ++
 .../TestReplicationScenariosExternalTables.java|  90 +
 .../org/apache/hadoop/hive/ql/exec/ExportTask.java |   2 +-
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |  47 ++-
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   |   4 +-
 .../hadoop/hive/ql/parse/repl/dump/ExportJob.java  |   7 +-
 .../hive/ql/parse/repl/dump/ExportService.java | 139 +
 .../hive/ql/parse/repl/dump/PartitionExport.java   |  58 +++--
 .../hive/ql/parse/repl/dump/TableExport.java   |  64 +-
 .../hadoop/hive/ql/exec/repl/TestReplDumpTask.java |   3 +-
 .../hive/ql/parse/repl/dump/TestExportService.java |  94 ++
 .../apache/hive/service/server/HiveServer2.java|   1 +
 14 files changed, 557 insertions(+), 68 deletions(-)
 copy 
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/DefaultHandler.java
 => 
itests/hive-jmh/src/main/java/org/apache/hive/benchmark/ql/exec/DumbTableExport.java
 (66%)
 create mode 100644 
itests/hive-jmh/src/main/java/org/apache/hive/benchmark/ql/exec/TableAndPartitionExportBench.java
 copy common/src/java/org/apache/hadoop/hive/ql/lib/NodeProcessorCtx.java => 
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/ExportJob.java (86%)
 create mode 100644 
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/ExportService.java
 create mode 100644 
ql/src/test/org/apache/hadoop/hive/ql/parse/repl/dump/TestExportService.java

[hive] branch master updated: HIVE-26573: Fix ClassCastException issues for Decimal64 vectorization. (#3630)

2022-10-20 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new d3c35ff43d0 HIVE-26573: Fix ClassCastException issues for Decimal64 
vectorization. (#3630)
d3c35ff43d0 is described below

commit d3c35ff43d068b46252e0cbd1eafd75c33420183
Author: scarlin-cloudera <55709772+scarlin-cloud...@users.noreply.github.com>
AuthorDate: Thu Oct 20 17:40:38 2022 -0700

HIVE-26573: Fix ClassCastException issues for Decimal64 vectorization. 
(#3630)
---
 .../hive/ql/exec/vector/VectorDeserializeRow.java  | 61 --
 .../hive/ql/exec/vector/TestVectorSerDeRow.java| 52 +-
 .../hive/ql/exec/vector/VectorRandomRowSource.java |  5 +-
 .../lazy/fast/LazySimpleDeserializeRead.java   |  3 --
 .../lazybinary/fast/LazyBinaryDeserializeRead.java |  8 +--
 .../hive/ql/exec/vector/Decimal64ColumnVector.java |  2 +-
 .../hive/ql/exec/vector/DecimalColumnVector.java   |  2 +-
 .../hive/ql/exec/vector/IDecimalColumnVector.java  | 25 +
 8 files changed, 104 insertions(+), 54 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
index 61ed0ce37da..c1f5d8ca32c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
@@ -26,7 +26,6 @@ import java.util.List;
 import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion;
@@ -87,7 +86,6 @@ public final class VectorDeserializeRow {
   private T deserializeRead;
 
   private TypeInfo[] sourceTypeInfos;
-  protected DataTypePhysicalVariation[] dataTypePhysicalVariations;
 
   private byte[] inputBytes;
 
@@ -99,7 +97,6 @@ public final class VectorDeserializeRow {
 this();
 this.deserializeRead = deserializeRead;
 sourceTypeInfos = deserializeRead.typeInfos();
-dataTypePhysicalVariations = 
deserializeRead.getDataTypePhysicalVariations();
   }
 
   // Not public since we must have the deserialize read object.
@@ -115,8 +112,6 @@ public final class VectorDeserializeRow {
 private PrimitiveCategory primitiveCategory;
   //The data type primitive category of the column being 
deserialized.
 
-private DataTypePhysicalVariation dataTypePhysicalVariation;
-
 private int maxLength;
   // For the CHAR and VARCHAR data types, the maximum 
character length of
   // the column.  Otherwise, 0.
@@ -139,12 +134,11 @@ public final class VectorDeserializeRow {
 
 private VectorBatchDeserializer deserializer;
 
-public Field(PrimitiveCategory primitiveCategory, 
DataTypePhysicalVariation dataTypePhysicalVariation,
+public Field(PrimitiveCategory primitiveCategory,
 int maxLength, VectorBatchDeserializer deserializer) {
   isPrimitive = true;
   this.category = Category.PRIMITIVE;
   this.primitiveCategory = primitiveCategory;
-  this.dataTypePhysicalVariation = dataTypePhysicalVariation;
   this.maxLength = maxLength;
   this.isConvert = false;
   this.conversionWritable = null;
@@ -160,7 +154,6 @@ public final class VectorDeserializeRow {
   this.category = category;
   this.objectInspector = 
TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
   this.primitiveCategory = null;
-  this.dataTypePhysicalVariation = null;
   this.maxLength = 0;
   this.isConvert = false;
   this.conversionWritable = null;
@@ -180,10 +173,6 @@ public final class VectorDeserializeRow {
   return primitiveCategory;
 }
 
-public DataTypePhysicalVariation getDataTypePhysicalVariation() {
-  return dataTypePhysicalVariation;
-}
-
 public void setMaxLength(int maxLength) {
   this.maxLength = maxLength;
 }
@@ -253,8 +242,7 @@ public final class VectorDeserializeRow {
 topLevelFields = new Field[count];
   }
 
-  private Field allocatePrimitiveField(TypeInfo sourceTypeInfo,
-  DataTypePhysicalVariation dataTypePhysicalVariation) {
+  private Field allocatePrimitiveField(TypeInfo sourceTypeInfo) {
 final PrimitiveTypeInfo sourcePrimitiveTypeInfo = (PrimitiveTypeInfo) 
sourceTypeInfo;
 final PrimitiveCategory sourcePrimitiveCategory = 
sourcePrimitiveTypeInfo.getPrimitiveCategory();
 int maxLength = 0;
@@ -318,7 +306,7 @@ public final class VectorDeserializeRow {
 throw new RuntimeException("Primiti

[hive] branch master updated: HIVE-23745: Avoid copying userpayload in task communicator (Dmitriy Fingerman, reviewed by Laszlo Bodor)

2022-10-04 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1be7135f649 HIVE-23745: Avoid copying userpayload in task communicator 
(Dmitriy Fingerman, reviewed by Laszlo Bodor)
1be7135f649 is described below

commit 1be7135f649138fcb8523944545364070d8490f6
Author: Dmitriy Fingerman 
AuthorDate: Tue Oct 4 04:24:34 2022 -0400

HIVE-23745: Avoid copying userpayload in task communicator (Dmitriy 
Fingerman, reviewed by Laszlo Bodor)

Co-authored-by: Dmitriy Fingerman 
---
 llap-common/src/java/org/apache/hadoop/hive/llap/tez/Converters.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/llap-common/src/java/org/apache/hadoop/hive/llap/tez/Converters.java 
b/llap-common/src/java/org/apache/hadoop/hive/llap/tez/Converters.java
index 83e5246b233..ba7664e7bf5 100644
--- a/llap-common/src/java/org/apache/hadoop/hive/llap/tez/Converters.java
+++ b/llap-common/src/java/org/apache/hadoop/hive/llap/tez/Converters.java
@@ -18,7 +18,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
-import com.google.protobuf.ByteString;
+import com.google.protobuf.UnsafeByteOperations;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.EntityDescriptorProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.GroupInputSpecProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.IOSpecProto;
@@ -179,7 +179,7 @@ public class Converters {
 if (userPayload != null) {
   UserPayloadProto.Builder payloadBuilder = UserPayloadProto.newBuilder();
   if (userPayload.hasPayload()) {
-
payloadBuilder.setUserPayload(ByteString.copyFrom(userPayload.getPayload()));
+
payloadBuilder.setUserPayload(UnsafeByteOperations.unsafeWrap(userPayload.getPayload()));
 payloadBuilder.setVersion(userPayload.getVersion());
   }
   builder.setUserPayload(payloadBuilder.build());

[hive] branch master updated: HIVE-24933: Replication fails for transactional tables having same name as dropped external tables. (Janardhan Hungund, reviewed by Teddy Choi)

2022-09-12 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 2093037cb49 HIVE-24933: Replication fails for transactional tables 
having same name as dropped external tables. (Janardhan Hungund, reviewed by 
Teddy Choi)
2093037cb49 is described below

commit 2093037cb49e0e824223042043a9b6ffc3aaebcd
Author: jhungund <106576553+jhung...@users.noreply.github.com>
AuthorDate: Tue Sep 13 09:15:27 2022 +0530

HIVE-24933: Replication fails for transactional tables having same name as 
dropped external tables. (Janardhan Hungund, reviewed by Teddy Choi)

Summary of the change:
While setting up the tasks during the repl-load phase of the replication, 
delay the access to
table metadata until the task execution. This will avoid inconsistent 
metadata access during
task creation.

Root Cause Analysis:

Background:
During the incremental load phase of replication, all event logs are 
processed sequentially.
Multiple tasks are spawned/created during the processing of each event.
All the spawned tasks are subsequently, executed sequantially.

Scenario of the issue:
The issue is seen in the following scenario:
1. An external table(Eg. T1) created, is replicated, to target cluster 
during earlier replication cycles.
2. This external table is dropped.
3. An new managed table with the same name (T1) is recreated.
4. The subsequent repl-load phase fails.

Root cause:
1. The above mentioned operations (table drop and recreation) are 
propogated to the target cluster
   via event logs during the subsequent incremental phase of replication.
2. We create DDL tasks to drop the external table for drop-table event.
3. We also create new tasks to create new managed tables.
4. Some additional events are logged which create tasks to load the newly 
created table.
5. During the creation of these load-table tasks, we try to access metadata 
corresponding to the new table.
   During normal scenario of a fresh table creation, the metadata store 
will not have data correspoding to the new table (yet to be created).
   However, in this scenario, the old table still exists and hence, we end 
up using the metadata corrsesponding to old table(external).
   We try to use this metadata to create the load tasks for the new table.
   During the exeuciton of these load tasks, which execute after the drop 
and recreate tasks, we find that the metadata set in the
   task context is stale and is inconsistent with the newly created table. 
Hence, the error.

Fix:
Do not access the table metadata during the task creation phase for 
table-load.
Instead, access the metadata during the task execution. By that time,
the metadata is updated to the latest state  with the previously executed 
tasks.

Change-Id: I79ed804617dcdadb51f961a933f4023ac0b6f509
---
 .../parse/BaseReplicationScenariosAcidTables.java  |   2 +-
 .../parse/TestReplicationScenariosAcidTables.java  |  82 
 .../org/apache/hadoop/hive/ql/exec/CopyTask.java   |  21 +++-
 .../org/apache/hadoop/hive/ql/exec/MoveTask.java   |  49 +---
 .../apache/hadoop/hive/ql/exec/ReplCopyTask.java   |   9 ++
 .../java/org/apache/hadoop/hive/ql/exec/Task.java  |  10 ++
 .../hive/ql/parse/ImportSemanticAnalyzer.java  | 138 ++---
 .../apache/hadoop/hive/ql/plan/BaseCopyWork.java   |  29 +
 .../org/apache/hadoop/hive/ql/plan/CopyWork.java   |  16 ++-
 .../hadoop/hive/ql/plan/DeferredWorkContext.java   |  68 ++
 .../org/apache/hadoop/hive/ql/plan/MoveWork.java   |  31 -
 .../apache/hadoop/hive/ql/plan/ReplCopyWork.java   |  18 +++
 12 files changed, 380 insertions(+), 93 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
index e1f3238486b..90aa944fe4d 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
@@ -117,7 +117,7 @@ public class BaseReplicationScenariosAcidTables {
 replicaNonAcid = new WarehouseInstance(LOG, miniDFSCluster, 
overridesForHiveConf1);
   }
 
-  private static void setReplicaExternalBase(FileSystem fs, Map confMap) throws IOException {
+  protected static void setReplicaExternalBase(FileSystem fs, Map confMap) throws IOException {
 fs.mkdirs(REPLICA_EXTERNAL_BASE);
 fullyQualifiedReplicaExternalBase =  
fs.getFileStatus(REPLICA_EXTERNAL_BASE).getPath().toString();
 confMap.put(HiveConf.ConfVars.REPL_EXTERNAL_TABLE_BASE_DIR.v

[hive] branch master updated (9b00e04a10d -> 27490a8130a)

2022-08-22 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from 9b00e04a10d HIVE-26483: Use DDL_NO_LOCK when running iceberg CTAS 
query (#3533) (Laszlo Pinter, reviewed by Denys Kuzmenko)
 add 27490a8130a HIVE-26363: converting replLogger time format from epoch 
to UTC (#3541)

No new revisions were added by this update.

Summary of changes:
 .../hadoop/hive/ql/exec/repl/util/ReplUtils.java   |  12 +++
 .../parse/repl/dump/log/state/AtlasDumpBegin.java  |   7 +-
 .../ql/parse/repl/dump/log/state/AtlasDumpEnd.java |   6 +-
 .../repl/dump/log/state/BootstrapDumpBegin.java|   8 +-
 .../repl/dump/log/state/BootstrapDumpEnd.java  |   8 +-
 .../repl/dump/log/state/BootstrapDumpFunction.java |   6 +-
 .../repl/dump/log/state/BootstrapDumpTable.java|   6 +-
 .../repl/dump/log/state/IncrementalDumpBegin.java  |   8 +-
 .../repl/dump/log/state/IncrementalDumpEnd.java|   8 +-
 .../repl/dump/log/state/IncrementalDumpEvent.java  |   6 +-
 .../parse/repl/dump/log/state/RangerDumpBegin.java |   6 +-
 .../parse/repl/dump/log/state/RangerDumpEnd.java   |   6 +-
 .../parse/repl/load/log/state/AtlasLoadBegin.java  |   6 +-
 .../ql/parse/repl/load/log/state/AtlasLoadEnd.java |   6 +-
 .../repl/load/log/state/BootstrapLoadBegin.java|   8 +-
 .../repl/load/log/state/BootstrapLoadEnd.java  |   8 +-
 .../repl/load/log/state/BootstrapLoadFunction.java |   6 +-
 .../repl/load/log/state/BootstrapLoadTable.java|   6 +-
 .../ql/parse/repl/load/log/state/DataCopyEnd.java  |   4 +-
 .../repl/load/log/state/IncrementalLoadBegin.java  |   8 +-
 .../repl/load/log/state/IncrementalLoadEnd.java|   8 +-
 .../repl/load/log/state/IncrementalLoadEvent.java  |   6 +-
 .../parse/repl/load/log/state/RangerLoadBegin.java |   6 +-
 .../parse/repl/load/log/state/RangerLoadEnd.java   |   7 +-
 .../ql/parse/repl/TestReplStateLogTimeFormat.java  | 110 +
 25 files changed, 242 insertions(+), 34 deletions(-)
 create mode 100644 
ql/src/test/org/apache/hadoop/hive/ql/parse/repl/TestReplStateLogTimeFormat.java

[hive] branch master updated: HIVE-26415 : Add epoch time in the information_schema.scheduled_executions view (#3467)

2022-08-18 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 277f2431a8c HIVE-26415 : Add epoch time in the 
information_schema.scheduled_executions view (#3467)
277f2431a8c is described below

commit 277f2431a8cb77e6473a62936cbca479221189b8
Author: shreenidhiSaigaonkar 
<98075562+shreenidhisaigaon...@users.noreply.github.com>
AuthorDate: Fri Aug 19 08:49:31 2022 +0530

HIVE-26415 : Add epoch time in the information_schema.scheduled_executions 
view (#3467)

HIVE-26415 : Add epoch time in the information_schema.scheduled_executions 
view (Shreenidhi Saigaonkar, reviewed by Teddy Choi)
---
 .../upgrade/hive/hive-schema-4.0.0-alpha-2.hive.sql |  3 ++-
 .../upgrade-4.0.0-alpha-1-to-4.0.0-alpha-2.hive.sql | 21 +
 ql/src/test/results/clientpositive/llap/sysdb.q.out |  1 +
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/metastore/scripts/upgrade/hive/hive-schema-4.0.0-alpha-2.hive.sql 
b/metastore/scripts/upgrade/hive/hive-schema-4.0.0-alpha-2.hive.sql
index f6dd0af61a0..c1d16486a53 100644
--- a/metastore/scripts/upgrade/hive/hive-schema-4.0.0-alpha-2.hive.sql
+++ b/metastore/scripts/upgrade/hive/hive-schema-4.0.0-alpha-2.hive.sql
@@ -1980,7 +1980,8 @@ SELECT
   FROM_UNIXTIME(END_TIME) as END_TIME,
   END_TIME-START_TIME as ELAPSED,
   ERROR_MESSAGE,
-  FROM_UNIXTIME(LAST_UPDATE_TIME) AS LAST_UPDATE_TIME
+  FROM_UNIXTIME(LAST_UPDATE_TIME) AS LAST_UPDATE_TIME,
+  START_TIME as START_TIME_SINCE_EPOCH
 FROM
   SYS.SCHEDULED_EXECUTIONS SE
 JOIN
diff --git 
a/metastore/scripts/upgrade/hive/upgrade-4.0.0-alpha-1-to-4.0.0-alpha-2.hive.sql
 
b/metastore/scripts/upgrade/hive/upgrade-4.0.0-alpha-1-to-4.0.0-alpha-2.hive.sql
index 73d9a5234d7..ac520136821 100644
--- 
a/metastore/scripts/upgrade/hive/upgrade-4.0.0-alpha-1-to-4.0.0-alpha-2.hive.sql
+++ 
b/metastore/scripts/upgrade/hive/upgrade-4.0.0-alpha-1-to-4.0.0-alpha-2.hive.sql
@@ -210,4 +210,25 @@ SELECT
   CQ_TBLPROPERTIES
 FROM COMPACTION_QUEUE;
 
+USE INFORMATION_SCHEMA;
+
+create or replace view SCHEDULED_EXECUTIONS as
+SELECT
+  SCHEDULED_EXECUTION_ID,
+  SCHEDULE_NAME,
+  EXECUTOR_QUERY_ID,
+  `STATE`,
+  FROM_UNIXTIME(START_TIME) as START_TIME,
+  FROM_UNIXTIME(END_TIME) as END_TIME,
+  END_TIME-START_TIME as ELAPSED,
+  ERROR_MESSAGE,
+  FROM_UNIXTIME(LAST_UPDATE_TIME) AS LAST_UPDATE_TIME,
+  START_TIME as START_TIME_SINCE_EPOCH
+FROM
+  SYS.SCHEDULED_EXECUTIONS SE
+JOIN
+  SYS.SCHEDULED_QUERIES SQ
+WHERE
+  SE.SCHEDULED_QUERY_ID=SQ.SCHEDULED_QUERY_ID;
+
 SELECT 'Finished upgrading MetaStore schema from 4.0.0-alpha-1 to 
4.0.0-alpha-2';
diff --git a/ql/src/test/results/clientpositive/llap/sysdb.q.out 
b/ql/src/test/results/clientpositive/llap/sysdb.q.out
index d2bf198a4ad..6dcdd595e3b 100644
--- a/ql/src/test/results/clientpositive/llap/sysdb.q.out
+++ b/ql/src/test/results/clientpositive/llap/sysdb.q.out
@@ -773,6 +773,7 @@ scheduled_executionsscheduled_execution_id
 scheduled_executions   scheduled_query_id
 scheduled_executions   start_time
 scheduled_executions   start_time
+scheduled_executions   start_time_since_epoch
 scheduled_executions   state
 scheduled_executions   state
 scheduled_queries  active_execution_id

[hive] branch master updated: Remove unnecessary files from HIVE-21368: Vectorization: Unnecessary Decimal64 -> HiveDecimal conversion (Teddy Choi, reviewed by Gopal V)

2019-03-15 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 677eef8  Remove unnecessary files from HIVE-21368: Vectorization: 
Unnecessary Decimal64 -> HiveDecimal conversion (Teddy Choi, reviewed by Gopal 
V)
677eef8 is described below

commit 677eef81580ec0511d39aef1aef4cce6c34c5c0d
Author: Teddy Choi 
AuthorDate: Sat Mar 16 00:50:34 2019 +0900

Remove unnecessary files from HIVE-21368: Vectorization: Unnecessary 
Decimal64 -> HiveDecimal conversion (Teddy Choi, reviewed by Gopal V)
---
 .../llap/vector_binary_join_groupby.q.out.orig | 645 -
 .../vector_binary_join_groupby.q.out.orig  | 573 --
 2 files changed, 1218 deletions(-)

diff --git 
a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out.orig 
b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out.orig
deleted file mode 100644
index 5881607..000
--- 
a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out.orig
+++ /dev/null
@@ -1,645 +0,0 @@
-PREHOOK: query: DROP TABLE over1k_n7
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: DROP TABLE over1k_n7
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: DROP TABLE hundredorc
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: DROP TABLE hundredorc
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: CREATE TABLE over1k_n7(t tinyint,
-   si smallint,
-   i int,
-   b bigint,
-   f float,
-   d double,
-   bo boolean,
-   s string,
-   ts timestamp,
-   `dec` decimal(4,2),
-   bin binary)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@over1k_n7
-POSTHOOK: query: CREATE TABLE over1k_n7(t tinyint,
-   si smallint,
-   i int,
-   b bigint,
-   f float,
-   d double,
-   bo boolean,
-   s string,
-   ts timestamp,
-   `dec` decimal(4,2),
-   bin binary)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@over1k_n7
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE 
INTO TABLE over1k_n7
-PREHOOK: type: LOAD
- A masked pattern was here 
-PREHOOK: Output: default@over1k_n7
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE 
INTO TABLE over1k_n7
-POSTHOOK: type: LOAD
- A masked pattern was here 
-POSTHOOK: Output: default@over1k_n7
-PREHOOK: query: CREATE TABLE hundredorc(t tinyint,
-   si smallint,
-   i int,
-   b bigint,
-   f float,
-   d double,
-   bo boolean,
-   s string,
-   ts timestamp,
-   `dec` decimal(4,2),
-   bin binary)
-STORED AS ORC
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@hundredorc
-POSTHOOK: query: CREATE TABLE hundredorc(t tinyint,
-   si smallint,
-   i int,
-   b bigint,
-   f float,
-   d double,
-   bo boolean,
-   s string,
-   ts timestamp,
-   `dec` decimal(4,2),
-   bin binary)
-STORED AS ORC
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@hundredorc
-PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k_n7 LIMIT 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over1k_n7
-PREHOOK: Output: default@hundredorc
-POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k_n7 LIMIT 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over1k_n7
-POSTHOOK: Output: default@hundredorc
-POSTHOOK: Lineage: hundredorc.b SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:b, type:bigint, comment:null), ]
-POSTHOOK: Lineage: hundredorc.bin SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:bin, type:binary, comment:null), ]
-POSTHOOK: Lineage: hundredorc.bo SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:bo, type:boolean, comment:null), ]
-POSTHOOK: Lineage: hundredorc.d SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:d, type:double, comment:null), ]
-POSTHOOK: Lineage: hundredorc.dec SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
-POSTHOOK: Lineage: hundredorc.f SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:f, type:float, comment:null), ]
-POSTHOOK: Lineage: hundredorc.i SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:i, type:int, comment:null), ]
-POSTHOOK: Lineage: hundredorc.s SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:s, type:string, comment:null), ]
-POSTHOOK: Lineage: hundredorc.si SIMPLE 
[(over1k_n7)over1k_n7.FieldSchema(name:si, type:smallint, comment:null), ]
-POSTHOOK: L

[hive] branch master updated: HIVE-21368: Vectorization: Unnecessary Decimal64 -> HiveDecimal conversion (Teddy Choi, reviewed by Gopal V)

2019-03-13 Thread tchoi

This is an automated email from the ASF dual-hosted git repository.

tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new c5219a8  HIVE-21368: Vectorization: Unnecessary Decimal64 -> 
HiveDecimal conversion (Teddy Choi, reviewed by Gopal V)
c5219a8 is described below

commit c5219a8490758e042604bc659b7a68e4cc94eef9
Author: Teddy Choi 
AuthorDate: Thu Mar 14 14:16:25 2019 +0900

HIVE-21368: Vectorization: Unnecessary Decimal64 -> HiveDecimal conversion 
(Teddy Choi, reviewed by Gopal V)
---
 .../test/resources/testconfiguration.properties|   1 +
 .../hadoop/hive/ql/exec/vector/VectorCopyRow.java  |  86 +++-
 .../hive/ql/optimizer/physical/Vectorizer.java |   2 +-
 .../queries/clientpositive/vector_decimal_join.q   |   7 +
 .../llap/vector_binary_join_groupby.q.out  |   7 +-
 ...q.out => vector_binary_join_groupby.q.out.orig} |   0
 .../clientpositive/llap/vector_decimal_join.q.out  | 221 +
 .../llap/vector_decimal_mapjoin.q.out  |   8 +-
 .../clientpositive/llap/vectorized_mapjoin3.q.out  |  27 ++-
 ql/src/test/results/clientpositive/masking_1.q.out |  14 +-
 .../spark/vector_decimal_mapjoin.q.out |   8 +-
 .../vector_binary_join_groupby.q.out   |   2 +-
 ...q.out => vector_binary_join_groupby.q.out.orig} |   0
 .../clientpositive/vector_decimal_join.q.out   | 162 +++
 .../clientpositive/vector_decimal_mapjoin.q.out|   8 +-
 15 files changed, 511 insertions(+), 42 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 4197fb8..20cda6a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -268,6 +268,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   vector_decimal_aggregate.q,\
   vector_decimal_cast.q,\
   vector_decimal_expressions.q,\
+  vector_decimal_join.q,\
   vector_decimal_mapjoin.q,\
   vector_decimal_math_funcs.q,\
   vector_decimal_precision.q,\
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
index 0cf8491..ec8fe52 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.vector;
 
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
@@ -176,9 +177,48 @@ public class VectorCopyRow {
 
 @Override
 void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch 
outBatch, int outBatchIndex) {
-  DecimalColumnVector inColVector = (DecimalColumnVector) 
inBatch.cols[inColumnIndex];
-  DecimalColumnVector outColVector = (DecimalColumnVector) 
outBatch.cols[outColumnIndex];
+  ColumnVector inColVector = inBatch.cols[inColumnIndex];
+  ColumnVector outColVector = outBatch.cols[outColumnIndex];
+  if (inColVector instanceof DecimalColumnVector) {
+if (outColVector instanceof DecimalColumnVector) {
+  copyDecimalToDecimal((DecimalColumnVector) inColVector, inBatchIndex,
+  (DecimalColumnVector) outColVector, outBatchIndex);
+} else {
+  copyDecimalToDecimal64((DecimalColumnVector) inColVector, 
inBatchIndex,
+  (Decimal64ColumnVector) outColVector, outBatchIndex);
+}
+  } else {
+if (outColVector instanceof DecimalColumnVector) {
+  copyDecimal64ToDecimal((Decimal64ColumnVector) inColVector, 
inBatchIndex,
+  (DecimalColumnVector) outColVector, outBatchIndex);
+} else {
+  copyDecimal64ToDecimal64((Decimal64ColumnVector) inColVector, 
inBatchIndex,
+  (Decimal64ColumnVector) outColVector, outBatchIndex);
+}
+  }
+}
+
+private void copyDecimalToDecimal(DecimalColumnVector inColVector, int 
inBatchIndex,
+DecimalColumnVector outColVector, int outBatchIndex) {
+  if (inColVector.isRepeating) {
+if (inColVector.noNulls || !inColVector.isNull[0]) {
+  outColVector.isNull[outBatchIndex] = false;
+  outColVector.set(outBatchIndex, inColVector.vector[0]);
+} else {
+  VectorizedBatchUtil.setNullColIsNullValue(outColVector, 
outBatchIndex);
+}
+  } else {
+if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) {
+  outColVector.isNull[outBatchIndex] = false;
+  outColVector.set(outBatchIndex, inColVector.vector[inBatchIndex]);
+} else {
+  VectorizedBatchUtil.setNullColIsNullValue(outColVector, 
o

hive git commit: HIVE-21126: Allow session level queries in LlapBaseInputFormat#getSplits() before actual get_splits() call (Shubham Chaurasia, reviewed by Teddy Choi)

2019-01-24 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 1327d47a5 -> 6fb264e6f


HIVE-21126: Allow session level queries in LlapBaseInputFormat#getSplits() 
before actual get_splits() call (Shubham Chaurasia, reviewed by Teddy Choi)

Change-Id: I4aa0bf6822ed09c4c4438494a0b7ac2eed378f75


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6fb264e6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6fb264e6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6fb264e6

Branch: refs/heads/master
Commit: 6fb264e6ff4e19127fed032638364ab1ecd7b0ec
Parents: 1327d47
Author: Teddy Choi 
Authored: Fri Jan 25 15:43:21 2019 +0900
Committer: Teddy Choi 
Committed: Fri Jan 25 15:43:21 2019 +0900

--
 .../hadoop/hive/llap/LlapBaseInputFormat.java  | 17 +
 1 file changed, 17 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/6fb264e6/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapBaseInputFormat.java
--
diff --git 
a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapBaseInputFormat.java 
b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapBaseInputFormat.java
index ef03be6..30f3720 100644
--- 
a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapBaseInputFormat.java
+++ 
b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapBaseInputFormat.java
@@ -37,6 +37,7 @@ import java.util.Random;
 import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.LinkedBlockingQueue;
+import java.util.regex.Pattern;
 
 import org.apache.commons.collections4.ListUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -114,6 +115,8 @@ public class LlapBaseInputFormat>
   public static final String PWD_KEY = "llap.if.pwd";
   public static final String HANDLE_ID = "llap.if.handleid";
   public static final String DB_KEY = "llap.if.database";
+  public static final String SESSION_QUERIES_FOR_GET_NUM_SPLITS = 
"llap.session.queries.for.get.num.splits";
+  public static final Pattern SET_QUERY_PATTERN = 
Pattern.compile("^\\s*set\\s+.*=.+$", Pattern.CASE_INSENSITIVE);
 
   public final String SPLIT_QUERY = "select get_splits(\"%s\",%d)";
   public static final LlapServiceInstance[] serviceInstanceArray = new 
LlapServiceInstance[0];
@@ -259,6 +262,20 @@ public class LlapBaseInputFormat>
 if (database != null && !database.isEmpty()) {
   stmt.execute("USE " + database);
 }
+String sessionQueries = job.get(SESSION_QUERIES_FOR_GET_NUM_SPLITS);
+if (sessionQueries != null && !sessionQueries.trim().isEmpty()) {
+  String[] queries = sessionQueries.trim().split(",");
+  for (String q : queries) {
+//allow only set queries
+if (SET_QUERY_PATTERN.matcher(q).matches()) {
+  LOG.debug("Executing session query: {}", q);
+  stmt.execute(q);
+} else {
+  LOG.warn("Only SET queries are allowed, not executing this 
query: {}", q);
+}
+  }
+}
+
 ResultSet res = stmt.executeQuery(sql);
 while (res.next()) {
   // deserialize split

hive git commit: HIVE-20419: Vectorization: Prevent mutation of VectorPartitionDesc after being used in a hashmap key (Teddy Choi, reviewed by Gopal V)

2019-01-21 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 34c8ca432 -> cb74a685c


HIVE-20419: Vectorization: Prevent mutation of VectorPartitionDesc after being 
used in a hashmap key (Teddy Choi, reviewed by Gopal V)

Change-Id: Ie9ae156c6b25f39dfdab1742b0c35219c8275062


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cb74a685
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cb74a685
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cb74a685

Branch: refs/heads/master
Commit: cb74a685ce2b09f8deaefa9805361ef96c26eccb
Parents: 34c8ca4
Author: Teddy Choi 
Authored: Tue Jan 22 15:14:26 2019 +0900
Committer: Teddy Choi 
Committed: Tue Jan 22 15:14:26 2019 +0900

--
 .../hive/ql/optimizer/physical/Vectorizer.java  | 130 +++
 .../hive/ql/plan/VectorPartitionDesc.java   |  34 ++---
 .../hive/ql/io/orc/TestInputOutputFormat.java   |   2 +-
 3 files changed, 94 insertions(+), 72 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/cb74a685/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 0a1a25f..5023f2f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -249,6 +249,8 @@ public class Vectorizer implements PhysicalPlanResolver {
 
   private static final Pattern supportedDataTypesPattern;
 
+  private static final TypeInfo[] EMPTY_TYPEINFO_ARRAY = new TypeInfo[0];
+
   static {
 StringBuilder patternBuilder = new StringBuilder();
 patternBuilder.append("int");
@@ -1372,10 +1374,16 @@ public class Vectorizer implements PhysicalPlanResolver 
{
 Set inputFileFormatClassNameSet,
 Map vectorPartitionDescMap,
 Set enabledConditionsMetSet, ArrayList 
enabledConditionsNotMetList,
-Set newSupportSet) {
+Set newSupportSet, List dataTypeInfoList) {
 
   Class inputFileFormatClass = 
pd.getInputFileFormatClass();
   String inputFileFormatClassName = inputFileFormatClass.getName();
+  final TypeInfo[] dataTypeInfos;
+  if (dataTypeInfoList == null) {
+dataTypeInfos = EMPTY_TYPEINFO_ARRAY;
+  } else {
+dataTypeInfos = dataTypeInfoList.toArray(new 
TypeInfo[dataTypeInfoList.size()]);
+  }
 
   // Always collect input file formats.
   inputFileFormatClassNameSet.add(inputFileFormatClassName);
@@ -1401,7 +1409,9 @@ public class Vectorizer implements PhysicalPlanResolver {
 addVectorPartitionDesc(
 pd,
 VectorPartitionDesc.createVectorizedInputFileFormat(
-inputFileFormatClassName, 
Utilities.isInputFileFormatSelfDescribing(pd)),
+inputFileFormatClassName,
+Utilities.isInputFileFormatSelfDescribing(pd),
+dataTypeInfos),
 vectorPartitionDescMap);
 
 
enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT.varname);
@@ -1427,7 +1437,9 @@ public class Vectorizer implements PhysicalPlanResolver {
   addVectorPartitionDesc(
   pd,
   VectorPartitionDesc.createVectorizedInputFileFormat(
-  inputFileFormatClassName, 
Utilities.isInputFileFormatSelfDescribing(pd)),
+  inputFileFormatClassName,
+  Utilities.isInputFileFormatSelfDescribing(pd),
+  dataTypeInfos),
   vectorPartitionDescMap);
 
   enabledConditionsMetSet.add(
@@ -1495,7 +1507,7 @@ public class Vectorizer implements PhysicalPlanResolver {
 addVectorPartitionDesc(
 pd,
 VectorPartitionDesc.createVectorDeserialize(
-inputFileFormatClassName, 
VectorDeserializeType.LAZY_SIMPLE),
+inputFileFormatClassName, 
VectorDeserializeType.LAZY_SIMPLE, dataTypeInfos),
 vectorPartitionDescMap);
 
 
enabledConditionsMetSet.add(HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE.varname);
@@ -1506,7 +1518,7 @@ public class Vectorizer implements PhysicalPlanResolver {
   addVectorPartitionDesc(
   pd,
   VectorPartitionDesc.createVectorDeserialize(
-  inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY),
+  inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY, 
dataTypeInfos),
   vectorPartitionDescMap);

hive git commit: HIVE-21041: NPE, ParseException in getting schema from logical plan (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-12-18 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/branch-3 1ea6e49cf -> 86009a0d9


HIVE-21041: NPE, ParseException in getting schema from logical plan (Teddy 
Choi, reviewed by Jesus Camacho Rodriguez)

Change-Id: Iecdee8ddeea2ca3f862b16b1641569f027a12fbb


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/86009a0d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/86009a0d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/86009a0d

Branch: refs/heads/branch-3
Commit: 86009a0d9af52bfde21299eaea8e8b824e489e33
Parents: 1ea6e49
Author: Teddy Choi 
Authored: Tue Dec 18 23:59:52 2018 +0900
Committer: Teddy Choi 
Committed: Tue Dec 18 23:59:52 2018 +0900

--
 .../test/resources/testconfiguration.properties |   1 +
 .../apache/hadoop/hive/ql/parse/ParseUtils.java |  22 +++
 .../test/queries/clientpositive/get_splits_0.q  |   3 +++
 .../clientpositive/llap/get_splits_0.q.out  | Bin 0 -> 916 bytes
 4 files changed, 17 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/86009a0d/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index af0bc66..b020ca4 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -529,6 +529,7 @@ minillaplocal.query.files=\
   external_jdbc_auth.q,\
   external_jdbc_table.q,\
   external_jdbc_table2.q,\
+  get_splits_0.q,\
   groupby2.q,\
   groupby_groupingset_bug.q,\
   hybridgrace_hashjoin_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/86009a0d/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index be1c59f..07c65af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -545,22 +545,26 @@ public final class ParseUtils {
 
   public static RelNode parseQuery(HiveConf conf, String viewQuery)
   throws SemanticException, IOException, ParseException {
-return getAnalyzer(conf).genLogicalPlan(parse(viewQuery));
+final Context ctx = new Context(conf);
+ctx.setIsLoadingMaterializedView(true);
+final ASTNode ast = parse(viewQuery, ctx);
+final CalcitePlanner analyzer = getAnalyzer(conf, ctx);
+return analyzer.genLogicalPlan(ast);
   }
 
   public static List parseQueryAndGetSchema(HiveConf conf, String 
viewQuery)
   throws SemanticException, IOException, ParseException {
-final CalcitePlanner analyzer = getAnalyzer(conf);
-analyzer.genLogicalPlan(parse(viewQuery));
+final Context ctx = new Context(conf);
+ctx.setIsLoadingMaterializedView(true);
+final ASTNode ast = parse(viewQuery, ctx);
+final CalcitePlanner analyzer = getAnalyzer(conf, ctx);
+analyzer.genLogicalPlan(ast);
 return analyzer.getResultSchema();
   }
 
-  private static CalcitePlanner getAnalyzer(HiveConf conf) throws 
SemanticException, IOException {
-final QueryState qs =
-new QueryState.Builder().withHiveConf(conf).build();
-CalcitePlanner analyzer = new CalcitePlanner(qs);
-Context ctx = new Context(conf);
-ctx.setIsLoadingMaterializedView(true);
+  private static CalcitePlanner getAnalyzer(HiveConf conf, Context ctx) throws 
SemanticException {
+final QueryState qs = new QueryState.Builder().withHiveConf(conf).build();
+final CalcitePlanner analyzer = new CalcitePlanner(qs);
 analyzer.initCtx(ctx);
 analyzer.init(false);
 return analyzer;

http://git-wip-us.apache.org/repos/asf/hive/blob/86009a0d/ql/src/test/queries/clientpositive/get_splits_0.q
--
diff --git a/ql/src/test/queries/clientpositive/get_splits_0.q 
b/ql/src/test/queries/clientpositive/get_splits_0.q
new file mode 100644
index 000..e585fda
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/get_splits_0.q
@@ -0,0 +1,3 @@
+--! qt:dataset:src
+select get_splits("SELECT * FROM src WHERE value in (SELECT value FROM 
src)",0);
+select get_splits("SELECT key AS `key 1`, value AS `value 1` FROM src",0);

http://git-wip-us.apache.org/repos/asf/hive/blob/86009a0d/ql/src/test/results/clientpositive/llap/get_splits_0.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/get_splits_0.q.out 
b/ql/src/test/results/clientpositive/llap/get_splits_0.q.out
new file mode 100644
index

hive git commit: HIVE-21041: NPE, ParseException in getting schema from logical plan (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-12-18 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master ef7c3963b -> 5131046ca


HIVE-21041: NPE, ParseException in getting schema from logical plan (Teddy 
Choi, reviewed by Jesus Camacho Rodriguez)

Change-Id: Iff9d9b02f934ed800f932ff916a59288a896f169


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5131046c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5131046c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5131046c

Branch: refs/heads/master
Commit: 5131046ca795ecf958e4e24f163e6014588c
Parents: ef7c396
Author: Teddy Choi 
Authored: Tue Dec 18 23:57:36 2018 +0900
Committer: Teddy Choi 
Committed: Tue Dec 18 23:57:36 2018 +0900

--
 .../test/resources/testconfiguration.properties |   1 +
 .../apache/hadoop/hive/ql/parse/ParseUtils.java |  22 +++
 .../test/queries/clientpositive/get_splits_0.q  |   3 +++
 .../clientpositive/llap/get_splits_0.q.out  | Bin 0 -> 916 bytes
 4 files changed, 17 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/5131046c/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 01cad2a..18e4f7f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -527,6 +527,7 @@ minillaplocal.query.files=\
   external_jdbc_table_partition.q,\
   external_jdbc_table_typeconversion.q,\
   fullouter_mapjoin_1_optimized.q,\
+  get_splits_0.q,\
   groupby2.q,\
   groupby_groupingset_bug.q,\
   hybridgrace_hashjoin_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/5131046c/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index be1c59f..07c65af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -545,22 +545,26 @@ public final class ParseUtils {
 
   public static RelNode parseQuery(HiveConf conf, String viewQuery)
   throws SemanticException, IOException, ParseException {
-return getAnalyzer(conf).genLogicalPlan(parse(viewQuery));
+final Context ctx = new Context(conf);
+ctx.setIsLoadingMaterializedView(true);
+final ASTNode ast = parse(viewQuery, ctx);
+final CalcitePlanner analyzer = getAnalyzer(conf, ctx);
+return analyzer.genLogicalPlan(ast);
   }
 
   public static List parseQueryAndGetSchema(HiveConf conf, String 
viewQuery)
   throws SemanticException, IOException, ParseException {
-final CalcitePlanner analyzer = getAnalyzer(conf);
-analyzer.genLogicalPlan(parse(viewQuery));
+final Context ctx = new Context(conf);
+ctx.setIsLoadingMaterializedView(true);
+final ASTNode ast = parse(viewQuery, ctx);
+final CalcitePlanner analyzer = getAnalyzer(conf, ctx);
+analyzer.genLogicalPlan(ast);
 return analyzer.getResultSchema();
   }
 
-  private static CalcitePlanner getAnalyzer(HiveConf conf) throws 
SemanticException, IOException {
-final QueryState qs =
-new QueryState.Builder().withHiveConf(conf).build();
-CalcitePlanner analyzer = new CalcitePlanner(qs);
-Context ctx = new Context(conf);
-ctx.setIsLoadingMaterializedView(true);
+  private static CalcitePlanner getAnalyzer(HiveConf conf, Context ctx) throws 
SemanticException {
+final QueryState qs = new QueryState.Builder().withHiveConf(conf).build();
+final CalcitePlanner analyzer = new CalcitePlanner(qs);
 analyzer.initCtx(ctx);
 analyzer.init(false);
 return analyzer;

http://git-wip-us.apache.org/repos/asf/hive/blob/5131046c/ql/src/test/queries/clientpositive/get_splits_0.q
--
diff --git a/ql/src/test/queries/clientpositive/get_splits_0.q 
b/ql/src/test/queries/clientpositive/get_splits_0.q
new file mode 100644
index 000..e585fda
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/get_splits_0.q
@@ -0,0 +1,3 @@
+--! qt:dataset:src
+select get_splits("SELECT * FROM src WHERE value in (SELECT value FROM 
src)",0);
+select get_splits("SELECT key AS `key 1`, value AS `value 1` FROM src",0);

http://git-wip-us.apache.org/repos/asf/hive/blob/5131046c/ql/src/test/results/clientpositive/llap/get_splits_0.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/get_splits_0.q.out 
b/ql/src/test/results/clientpositive/llap/get_splits_0.q.out

hive git commit: HIVE-20827: Inconsistent results for empty arrays (Teddy Choi, reviewed by Gopal V)

2018-12-05 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 8e30b5e02 -> 02b1ee387


HIVE-20827: Inconsistent results for empty arrays (Teddy Choi, reviewed by 
Gopal V)

Signed-off-by: Teddy Choi 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/02b1ee38
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/02b1ee38
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/02b1ee38

Branch: refs/heads/master
Commit: 02b1ee3876ba3690f4455858e5124a413c8a0337
Parents: 8e30b5e
Author: Teddy Choi 
Authored: Thu Dec 6 01:29:40 2018 +0900
Committer: Teddy Choi 
Committed: Thu Dec 6 01:29:40 2018 +0900

--
 data/files/empty_array.txt  |  1 +
 .../test/resources/testconfiguration.properties |  1 +
 .../test/queries/clientpositive/empty_array.q   | 11 +++
 .../results/clientpositive/empty_array.q.out| 70 
 .../clientpositive/llap/empty_array.q.out   | 70 
 .../lazy/fast/LazySimpleDeserializeRead.java| 24 ++-
 .../hive/serde2/lazy/TestLazySimpleFast.java| 32 +
 7 files changed, 208 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/02b1ee38/data/files/empty_array.txt
--
diff --git a/data/files/empty_array.txt b/data/files/empty_array.txt
new file mode 100644
index 000..7edb2fa
--- /dev/null
+++ b/data/files/empty_array.txt
@@ -0,0 +1 @@
+,

http://git-wip-us.apache.org/repos/asf/hive/blob/02b1ee38/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index d8f559c..6d92654 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -136,6 +136,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   dynpart_sort_opt_vectorization.q,\
   dynpart_sort_optimization.q,\
   dynpart_sort_optimization2.q,\
+  empty_array.q,\
   empty_join.q,\
   enforce_order.q,\
   filter_join_breaktask.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/02b1ee38/ql/src/test/queries/clientpositive/empty_array.q
--
diff --git a/ql/src/test/queries/clientpositive/empty_array.q 
b/ql/src/test/queries/clientpositive/empty_array.q
new file mode 100644
index 000..dff24e3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/empty_array.q
@@ -0,0 +1,11 @@
+create table dtypes3 (c5 array, c13 array>) row format 
delimited fields terminated by ',' stored as TEXTFILE;
+load data local inpath '../../data/files/empty_array.txt' into table dtypes3;
+create table dtypes4 (c5 array, c13 array>) stored as ORC;
+create table dtypes5 (c5 array, c13 array>) stored as 
TEXTFILE;
+
+SET hive.vectorized.execution.enabled=true;
+insert into dtypes4 select * from dtypes3;
+insert into dtypes5 select * from dtypes3;
+
+select * from dtypes4;
+select * from dtypes5;

http://git-wip-us.apache.org/repos/asf/hive/blob/02b1ee38/ql/src/test/results/clientpositive/empty_array.q.out
--
diff --git a/ql/src/test/results/clientpositive/empty_array.q.out 
b/ql/src/test/results/clientpositive/empty_array.q.out
new file mode 100644
index 000..881bdcf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/empty_array.q.out
@@ -0,0 +1,70 @@
+PREHOOK: query: create table dtypes3 (c5 array, c13 array>) 
row format delimited fields terminated by ',' stored as TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dtypes3
+POSTHOOK: query: create table dtypes3 (c5 array, c13 
array>) row format delimited fields terminated by ',' stored as 
TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dtypes3
+PREHOOK: query: load data local inpath '../../data/files/empty_array.txt' into 
table dtypes3
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@dtypes3
+POSTHOOK: query: load data local inpath '../../data/files/empty_array.txt' 
into table dtypes3
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@dtypes3
+PREHOOK: query: create table dtypes4 (c5 array, c13 array>) 
stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dtypes4
+POSTHOOK: query: create table dtypes4 (c5 array, c13 
array>) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dtypes4
+PREHOOK: query: create table dtypes5 (c5 array, c13 array>) 
stored as TEXTFILE

hive git commit: HIVE-20873: Use Murmur hash for VectorHashKeyWrapperTwoLong to reduce hash collision (Teddy Choi, reviewed by Gopal V)

2018-11-27 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 7c5b99b5b -> dffa3a165


HIVE-20873: Use Murmur hash for VectorHashKeyWrapperTwoLong to reduce hash 
collision (Teddy Choi, reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dffa3a16
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dffa3a16
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dffa3a16

Branch: refs/heads/master
Commit: dffa3a16588bc8e95b9d0ab5af295a74e06ef702
Parents: 7c5b99b
Author: Teddy Choi 
Authored: Tue Nov 27 20:08:04 2018 +0900
Committer: Teddy Choi 
Committed: Tue Nov 27 20:08:04 2018 +0900

--
 .../apache/hive/common/util/HashCodeUtil.java   | 13 ++---
 .../wrapper/VectorHashKeyWrapperTwoLong.java|  4 +-
 .../org/apache/hive/common/util/Murmur3.java| 57 +---
 .../apache/hive/common/util/TestMurmur3.java| 47 
 4 files changed, 103 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/dffa3a16/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
--
diff --git a/common/src/java/org/apache/hive/common/util/HashCodeUtil.java 
b/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
index 700b2e1..1330cbe 100644
--- a/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
+++ b/common/src/java/org/apache/hive/common/util/HashCodeUtil.java
@@ -33,15 +33,12 @@ public class HashCodeUtil {
 return key;
   }
 
+  public static int calculateTwoLongHashCode(long l0, long l1) {
+return Murmur3.hash32(l0, l1);
+  }
+
   public static int calculateLongHashCode(long key) {
-// Mixing down into the lower bits - this produces a worse hashcode in 
purely
-// numeric terms, but leaving entropy in the higher bits is not useful for 
a
-// 2^n bucketing scheme. See JSR166 ConcurrentHashMap r1.89 (released 
under Public Domain)
-// Note: ConcurrentHashMap has since reverted this to retain entropy bits 
higher
-// up, to support the 2-level hashing for segment which operates at a 
higher bitmask
-key ^= (key >>> 7) ^ (key >>> 4);
-key ^= (key >>> 20) ^ (key >>> 12);
-return (int) key;
+return Murmur3.hash32(key);
   }
 
   public static void calculateLongArrayHashCodes(long[] longs, int[] 
hashCodes, final int count) {

http://git-wip-us.apache.org/repos/asf/hive/blob/dffa3a16/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
index 1652728..c40c35d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
@@ -44,9 +44,7 @@ public class VectorHashKeyWrapperTwoLong extends 
VectorHashKeyWrapperTwoBase {
 HashCodeUtil.calculateLongHashCode(longValue0) ^
 null1Hashcode));
 } else {
-  hashcode =
-  HashCodeUtil.calculateLongHashCode(longValue0) >>> 16 ^
-  HashCodeUtil.calculateLongHashCode(longValue1);
+  hashcode = HashCodeUtil.calculateTwoLongHashCode(longValue0, longValue1);
 }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/dffa3a16/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
--
diff --git a/storage-api/src/java/org/apache/hive/common/util/Murmur3.java 
b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
index 8aae28b..85db95c 100644
--- a/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
+++ b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
@@ -54,6 +54,43 @@ public class Murmur3 {
 
   public static final int DEFAULT_SEED = 104729;
 
+  public static int hash32(long l0, long l1) {
+return hash32(l0, l1, DEFAULT_SEED);
+  }
+
+  public static int hash32(long l0) {
+return hash32(l0, DEFAULT_SEED);
+  }
+
+  /**
+   * Murmur3 32-bit variant.
+   */
+  public static int hash32(long l0, int seed) {
+int hash = seed;
+final long r0 = Long.reverseBytes(l0);
+
+hash = mix32((int) r0, hash);
+hash = mix32((int) (r0 >>> 32), hash);
+
+return fmix32(Long.BYTES, hash);
+  }
+
+  /**
+   * Murmur3 32-bit variant.
+   */
+  public static int hash32(long l0, long l1, int seed) {
+int hash = seed;
+final long r0 = Long.reverseBytes(l0);
+final long r1 = Long.reverseBytes(l1);
+
+hash = mix32((int) r0, hash);
+hash = mix32((int) (r0 >>>

hive git commit: HIVE-20930: VectorCoalesce in FILTER mode doesn't take effect (Teddy Choi, reviewed by Ashutosh Chauhan)

2018-11-27 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master d3f8aba74 -> 7c5b99b5b


HIVE-20930: VectorCoalesce in FILTER mode doesn't take effect (Teddy Choi, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7c5b99b5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7c5b99b5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7c5b99b5

Branch: refs/heads/master
Commit: 7c5b99b5b560b418ed44a19c45a9b289ffb11ab6
Parents: d3f8aba
Author: Teddy Choi 
Authored: Tue Nov 27 19:22:47 2018 +0900
Committer: Teddy Choi 
Committed: Tue Nov 27 19:22:47 2018 +0900

--
 .../ql/exec/vector/VectorizationContext.java| 33 ++--
 1 file changed, 30 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/7c5b99b5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index f0d75f5..8109213 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2240,7 +2240,7 @@ import com.google.common.annotations.VisibleForTesting;
 
   // Coalesce is a special case because it can take variable number of 
arguments.
   // Nvl is a specialization of the Coalesce.
-  ve = getCoalesceExpression(childExpr, returnType);
+  ve = getCoalesceExpression(childExpr, mode, returnType);
 } else if (udf instanceof GenericUDFElt) {
 
   // Elt is a special case because it can take variable number of 
arguments.
@@ -2311,7 +2311,8 @@ import com.google.common.annotations.VisibleForTesting;
 }
   }
 
-  private VectorExpression getCoalesceExpression(List childExpr, 
TypeInfo returnType)
+  private VectorExpression getCoalesceExpression(List childExpr,
+  VectorExpressionDescriptor.Mode mode, TypeInfo returnType)
   throws HiveException {
 int[] inputColumns = new int[childExpr.size()];
 VectorExpression[] vectorChildren =
@@ -2339,7 +2340,33 @@ import com.google.common.annotations.VisibleForTesting;
 
vectorCoalesce.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
 
 freeNonColumns(vectorChildren);
-return vectorCoalesce;
+
+boolean isFilter = false;// Assume.
+if (mode == VectorExpressionDescriptor.Mode.FILTER) {
+
+  // Is output type a BOOLEAN?
+  if (returnType.getCategory() == Category.PRIMITIVE &&
+  ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == 
PrimitiveCategory.BOOLEAN) {
+isFilter = true;
+  } else {
+return null;
+  }
+}
+
+if (isFilter) {
+
+  // Wrap the PROJECTION IF expression output with a filter.
+  SelectColumnIsTrue filterVectorExpr = new 
SelectColumnIsTrue(vectorCoalesce.getOutputColumnNum());
+
+  filterVectorExpr.setChildExpressions(new VectorExpression[] 
{vectorCoalesce});
+
+  filterVectorExpr.setInputTypeInfos(vectorCoalesce.getOutputTypeInfo());
+  
filterVectorExpr.setInputDataTypePhysicalVariations(vectorCoalesce.getOutputDataTypePhysicalVariation());
+
+  return filterVectorExpr;
+} else {
+  return vectorCoalesce;
+}
   }
 
   private VectorExpression getEltExpression(List childExpr, 
TypeInfo returnType)

[2/2] hive git commit: HIVE-20954: Vector RS operator is not using uniform hash function for TPC-DS query 95 (Teddy Choi, reviewed by Gopal V)

2018-11-26 Thread tchoi

HIVE-20954: Vector RS operator is not using uniform hash function for TPC-DS 
query 95 (Teddy Choi, reviewed by Gopal V)

Change-Id: I861e45474bd2c766d247d1db02db89d9e4c510fc


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/56625f3a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/56625f3a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/56625f3a

Branch: refs/heads/master
Commit: 56625f3a6b7f6b671b4cfd1a753e9906bf5d34a1
Parents: e579e06
Author: Teddy Choi 
Authored: Tue Nov 27 11:30:51 2018 +0900
Committer: Teddy Choi 
Committed: Tue Nov 27 11:30:51 2018 +0900

--
 .../test/resources/testconfiguration.properties |1 +
 .../hive/ql/optimizer/SharedWorkOptimizer.java  |  117 +-
 .../ql/optimizer/TestSharedWorkOptimizer.java   |  126 ++
 .../test/queries/clientpositive/keep_uniform.q  |  201 +++
 .../results/clientpositive/keep_uniform.q.out   | 1118 
 .../clientpositive/llap/keep_uniform.q.out  | 1191 ++
 .../clientpositive/llap/subquery_in.q.out   |   18 +-
 .../clientpositive/llap/subquery_select.q.out   |   88 +-
 8 files changed, 2790 insertions(+), 70 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/56625f3a/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index fb50588..5aadf2c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -159,6 +159,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   join_emit_interval.q,\
   join46.q,\
   join_nullsafe.q,\
+  keep_uniform.q,\
   kill_query.q,\
   leftsemijoin.q,\
   limit_pushdown.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/56625f3a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index 1e3887b..0cb3b21 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -23,6 +23,7 @@ import java.util.BitSet;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -82,6 +83,11 @@ import com.google.common.collect.Multiset;
 import com.google.common.collect.Sets;
 import com.google.common.collect.TreeMultiset;
 
+import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.AUTOPARALLEL;
+import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.FIXED;
+import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM;
+import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNSET;
+
 /**
  * Shared computation optimizer.
  *
@@ -409,6 +415,8 @@ public class SharedWorkOptimizer extends Transform {
 continue;
   }
 
+  deduplicateReduceTraits(retainableRsOp.getConf(), 
discardableRsOp.getConf());
+
   // We can merge
   Operator lastRetainableOp = 
sr.retainableOps.get(sr.retainableOps.size() - 1);
   Operator lastDiscardableOp = 
sr.discardableOps.get(sr.discardableOps.size() - 1);
@@ -1124,7 +1132,7 @@ public class SharedWorkOptimizer extends Transform {
 op1Conf.getTag() == op2Conf.getTag() &&
 StringUtils.equals(op1Conf.getOrder(), op2Conf.getOrder()) &&
 op1Conf.getTopN() == op2Conf.getTopN() &&
-op1Conf.isAutoParallel() == op2Conf.isAutoParallel()) {
+canDeduplicateReduceTraits(op1Conf, op2Conf)) {
 return true;
   } else {
 return false;
@@ -1474,6 +1482,113 @@ public class SharedWorkOptimizer extends Transform {
 }
   }
 
+  static boolean canDeduplicateReduceTraits(ReduceSinkDesc retainable, 
ReduceSinkDesc discardable) {
+return deduplicateReduceTraits(retainable, discardable, false);
+  }
+
+  static boolean deduplicateReduceTraits(ReduceSinkDesc retainable, 
ReduceSinkDesc discardable) {
+return deduplicateReduceTraits(retainable, discardable, true);
+  }
+
+  private static boolean deduplicateReduceTraits(ReduceSinkDesc retainable,
+  ReduceSinkDesc discardable, boolean apply) {
+
+final EnumSet retainableTraits = 
retainable.getReducerTraits();
+final EnumSet discardableTraits = 
discardable.getReducerTraits();
+
+final boolean x1 =

[1/2] hive git commit: HIVE-20954: Vector RS operator is not using uniform hash function for TPC-DS query 95 (Teddy Choi, reviewed by Gopal V)

2018-11-26 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master e579e0683 -> 56625f3a6


http://git-wip-us.apache.org/repos/asf/hive/blob/56625f3a/ql/src/test/results/clientpositive/llap/keep_uniform.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/keep_uniform.q.out 
b/ql/src/test/results/clientpositive/llap/keep_uniform.q.out
new file mode 100644
index 000..27a48f4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/keep_uniform.q.out
@@ -0,0 +1,1191 @@
+PREHOOK: query: drop table if exists customer_address
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists customer_address
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table customer_address
+(
+ca_address_sk int,
+ca_address_id string,
+ca_street_number  string,
+ca_street_namestring,
+ca_street_typestring,
+ca_suite_number   string,
+ca_city   string,
+ca_county string,
+ca_state  string,
+ca_zipstring,
+ca_countrystring,
+ca_gmt_offset decimal(5,2),
+ca_location_type  string
+)
+row format delimited fields terminated by '\t'
+STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@customer_address
+POSTHOOK: query: create table customer_address
+(
+ca_address_sk int,
+ca_address_id string,
+ca_street_number  string,
+ca_street_namestring,
+ca_street_typestring,
+ca_suite_number   string,
+ca_city   string,
+ca_county string,
+ca_state  string,
+ca_zipstring,
+ca_countrystring,
+ca_gmt_offset decimal(5,2),
+ca_location_type  string
+)
+row format delimited fields terminated by '\t'
+STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@customer_address
+PREHOOK: query: drop table if exists date_dim
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists date_dim
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table date_dim
+(
+d_date_sk int,
+d_date_id string,
+d_datestring,
+d_month_seq   int,
+d_week_seqint,
+d_quarter_seq int,
+d_yearint,
+d_dow int,
+d_moy int,
+d_dom int,
+d_qoy int,
+d_fy_year int,
+d_fy_quarter_seq  int,
+d_fy_week_seq int,
+d_day_namestring,
+d_quarter_namestring,
+d_holiday string,
+d_weekend string,
+d_following_holiday   string,
+d_first_dom   int,
+d_last_domint,
+d_same_day_ly int,
+d_same_day_lq int,
+d_current_day string,
+d_current_weekstring,
+d_current_month   string,
+d_current_quarter string,
+d_current_yearstring
+)
+row format delimited fields terminated by '\t'
+STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@date_dim
+POSTHOOK: query: create table date_dim
+(
+d_date_sk int,
+d_date_id string,
+d_datestring,
+d_month_seq   int,
+d_week_seqint,
+d_quarter_seq int,
+d_yearint,
+d_dow int,
+d_moy int,
+d_dom int,
+d_qoy int,
+d_fy_year int,
+d_fy_quarter_seq  int,
+d_fy_week_seq int,
+d_day_namestring,
+d_quarter_namestring,
+d_holiday string,
+d_weekend string,
+d_following_holiday   string,
+d_first_dom   int,
+d_last_domint,
+d_same_day_ly int,
+d_same_day_lq int,
+d_current_day string,
+d_current_weekstring,
+d_current_month   string,
+d_current_quarter string,
+d_current_yearstring
+)
+row format delimited fields terminated by '\t'
+STORED AS ORC tblproperties ("transactional"="true", "orc.compress"="ZLIB")
+POSTHOOK: type:

hive git commit: HIVE-20751: Upgrade arrow version to 0.10.0 (Shubham Chaurasia, reviewed by Teddy Choi)

2018-10-31 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master fb2d27ea2 -> 4b40f7d2b


HIVE-20751: Upgrade arrow version to 0.10.0 (Shubham Chaurasia, reviewed by 
Teddy Choi)

Signed-off-by: Teddy Choi 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4b40f7d2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4b40f7d2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4b40f7d2

Branch: refs/heads/master
Commit: 4b40f7d2b35df26af773ec138fdf521231792cdf
Parents: fb2d27e
Author: Teddy Choi 
Authored: Wed Oct 31 23:37:41 2018 +0900
Committer: Teddy Choi 
Committed: Wed Oct 31 23:37:41 2018 +0900

--
 pom.xml   |  2 +-
 .../hadoop/hive/llap/LlapArrowRecordWriter.java   |  4 ++--
 .../hive/ql/io/arrow/ArrowColumnarBatchSerDe.java |  2 +-
 .../hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java |  8 
 .../apache/hadoop/hive/ql/io/arrow/Serializer.java| 14 +++---
 5 files changed, 15 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4b40f7d2/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 842a143..26b662e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -123,7 +123,7 @@
 1.5.6
 
0.1
 
-0.8.0
+0.10.0
 1.12.0
 1.8.2
 0.8.0.RELEASE

http://git-wip-us.apache.org/repos/asf/hive/blob/4b40f7d2/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java 
b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
index 9ee1048..4cd8a61 100644
--- a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
+++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
@@ -22,9 +22,9 @@ import java.io.IOException;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
 import org.apache.arrow.vector.ipc.ArrowStreamWriter;
 import org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable;
-import org.apache.arrow.vector.complex.NullableMapVector;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.RecordWriter;
 import org.apache.hadoop.mapred.Reporter;
@@ -50,7 +50,7 @@ public class LlapArrowRecordWriter
   ArrowStreamWriter arrowStreamWriter;
   WritableByteChannelAdapter out;
   BufferAllocator allocator;
-  NullableMapVector rootVector;
+  NonNullableStructVector rootVector;
 
   public LlapArrowRecordWriter(WritableByteChannelAdapter out) {
 this.out = out;

http://git-wip-us.apache.org/repos/asf/hive/blob/4b40f7d2/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
index ed82d2d..0408707 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
@@ -195,7 +195,7 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe {
 for (int i = 0; i < structSize; i++) {
   structFields.add(toField(fieldNames.get(i), fieldTypeInfos.get(i)));
 }
-return new Field(name, FieldType.nullable(MinorType.MAP.getType()), 
structFields);
+return new Field(name, FieldType.nullable(MinorType.STRUCT.getType()), 
structFields);
   case UNION:
 final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
 final List objectTypeInfos = 
unionTypeInfo.getAllUnionObjectTypeInfos();

http://git-wip-us.apache.org/repos/asf/hive/blob/4b40f7d2/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java
index 40813fa..53bee6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowWrapperWritable.java
@@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.io.arrow;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.hadoop.io.WritableComparable;
-import org.apache.arrow.vector.complex.NullableMapVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
 
 import java.io.DataInput;
 import

hive git commit: HIVE-20552: Get Schema from LogicalPlan faster (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-10-04 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/branch-3 50cc61154 -> 3936f8227


HIVE-20552: Get Schema from LogicalPlan faster (Teddy Choi, reviewed by Jesus 
Camacho Rodriguez)

Signed-off-by: Teddy Choi 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3936f822
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3936f822
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3936f822

Branch: refs/heads/branch-3
Commit: 3936f8227ed733a50e547ed6ac3d566b1d28f78a
Parents: 50cc611
Author: Teddy Choi 
Authored: Thu Oct 4 15:45:25 2018 +0900
Committer: Teddy Choi 
Committed: Thu Oct 4 15:45:25 2018 +0900

--
 .../metadata/HiveMaterializedViewsRegistry.java | 31 
 .../apache/hadoop/hive/ql/parse/ParseUtils.java | 28 ++
 .../ql/udf/generic/GenericUDTFGetSplits.java| 29 --
 3 files changed, 53 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/3936f822/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
index 696227b..a8856a9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
@@ -52,8 +52,6 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
-import org.apache.hadoop.hive.ql.Context;
-import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
@@ -61,11 +59,8 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
-import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
-import org.apache.hadoop.hive.ql.parse.ColumnStatsList;
 import org.apache.hadoop.hive.ql.parse.ParseUtils;
-import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.SerDeException;
@@ -224,10 +219,12 @@ public final class HiveMaterializedViewsRegistry {
   " ignored; error creating view replacement");
   return null;
 }
-final RelNode queryRel = parseQuery(conf, viewQuery);
-if (queryRel == null) {
+final RelNode queryRel;
+try {
+  queryRel = ParseUtils.parseQuery(conf, viewQuery);
+} catch (Exception e) {
   LOG.warn("Materialized view " + materializedViewTable.getCompleteName() +
-  " ignored; error parsing original query");
+  " ignored; error parsing original query; " + e);
   return null;
 }
 
@@ -400,24 +397,6 @@ public final class HiveMaterializedViewsRegistry {
 return tableRel;
   }
 
-  private static RelNode parseQuery(HiveConf conf, String viewQuery) {
-try {
-  final ASTNode node = ParseUtils.parse(viewQuery);
-  final QueryState qs =
-  new QueryState.Builder().withHiveConf(conf).build();
-  CalcitePlanner analyzer = new CalcitePlanner(qs);
-  Context ctx = new Context(conf);
-  ctx.setIsLoadingMaterializedView(true);
-  analyzer.initCtx(ctx);
-  analyzer.init(false);
-  return analyzer.genLogicalPlan(node);
-} catch (Exception e) {
-  // We could not parse the view
-  LOG.error("Error parsing original query for materialized view", e);
-  return null;
-}
-  }
-
   private static TableType obtainTableType(Table tabMetaData) {
 if (tabMetaData.getStorageHandler() != null) {
   final String storageHandlerStr = 
tabMetaData.getStorageHandler().toString();

http://git-wip-us.apache.org/repos/asf/hive/blob/3936f822/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index 89e8412..be1c59f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++

hive git commit: HIVE-20552: Get Schema from LogicalPlan faster (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-27 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 727e4b2d2 -> 778c47ccd


HIVE-20552: Get Schema from LogicalPlan faster (Teddy Choi, reviewed by Jesus 
Camacho Rodriguez)

Signed-off-by: Teddy Choi 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/778c47cc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/778c47cc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/778c47cc

Branch: refs/heads/master
Commit: 778c47ccdffa138f4738aadc94a75b75b5055fe1
Parents: 727e4b2
Author: Teddy Choi 
Authored: Fri Sep 28 11:28:39 2018 +0900
Committer: Teddy Choi 
Committed: Fri Sep 28 11:28:39 2018 +0900

--
 .../metadata/HiveMaterializedViewsRegistry.java | 31 
 .../apache/hadoop/hive/ql/parse/ParseUtils.java | 28 ++
 .../ql/udf/generic/GenericUDTFGetSplits.java| 29 --
 3 files changed, 53 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/778c47cc/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
index 7d11cac..ee405ca 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
@@ -52,8 +52,6 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
-import org.apache.hadoop.hive.ql.Context;
-import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
@@ -61,11 +59,8 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
-import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
-import org.apache.hadoop.hive.ql.parse.ColumnStatsList;
 import org.apache.hadoop.hive.ql.parse.ParseUtils;
-import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.SerDeException;
@@ -227,10 +222,12 @@ public final class HiveMaterializedViewsRegistry {
   " ignored; error creating view replacement");
   return null;
 }
-final RelNode queryRel = parseQuery(conf, viewQuery);
-if (queryRel == null) {
+final RelNode queryRel;
+try {
+  queryRel = ParseUtils.parseQuery(conf, viewQuery);
+} catch (Exception e) {
   LOG.warn("Materialized view " + materializedViewTable.getCompleteName() +
-  " ignored; error parsing original query");
+  " ignored; error parsing original query; " + e);
   return null;
 }
 
@@ -403,24 +400,6 @@ public final class HiveMaterializedViewsRegistry {
 return tableRel;
   }
 
-  private static RelNode parseQuery(HiveConf conf, String viewQuery) {
-try {
-  final ASTNode node = ParseUtils.parse(viewQuery);
-  final QueryState qs =
-  new QueryState.Builder().withHiveConf(conf).build();
-  CalcitePlanner analyzer = new CalcitePlanner(qs);
-  Context ctx = new Context(conf);
-  ctx.setIsLoadingMaterializedView(true);
-  analyzer.initCtx(ctx);
-  analyzer.init(false);
-  return analyzer.genLogicalPlan(node);
-} catch (Exception e) {
-  // We could not parse the view
-  LOG.error("Error parsing original query for materialized view", e);
-  return null;
-}
-  }
-
   private static TableType obtainTableType(Table tabMetaData) {
 if (tabMetaData.getStorageHandler() != null) {
   final String storageHandlerStr = 
tabMetaData.getStorageHandler().toString();

http://git-wip-us.apache.org/repos/asf/hive/blob/778c47cc/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index 89e8412..be1c59f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++

hive git commit: HIVE-20052: Arrow serde should fill ArrowColumnVector(Decimal) with the given schema precision/scale (Teddy Choi, reviewed by Matt McCline)

2018-09-27 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 34331f3c7 -> 727e4b2d2


HIVE-20052: Arrow serde should fill ArrowColumnVector(Decimal) with the given 
schema precision/scale (Teddy Choi, reviewed by Matt McCline)

Signed-off-by: Teddy Choi 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/727e4b2d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/727e4b2d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/727e4b2d

Branch: refs/heads/master
Commit: 727e4b2d21d6f451a5073f2eaa0241e84225281f
Parents: 34331f3
Author: Teddy Choi 
Authored: Fri Sep 28 11:26:12 2018 +0900
Committer: Teddy Choi 
Committed: Fri Sep 28 11:26:12 2018 +0900

--
 .../hadoop/hive/ql/io/arrow/Serializer.java | 33 ++--
 .../io/arrow/TestArrowColumnarBatchSerDe.java   | 25 +++
 2 files changed, 49 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/727e4b2d/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
index 6b31045..7dffa6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
@@ -38,11 +38,14 @@ import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.NullableMapVector;
+import org.apache.arrow.vector.holders.DecimalHolder;
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -61,6 +64,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -74,10 +78,10 @@ import 
org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
 import org.apache.arrow.memory.BufferAllocator;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 
+import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ARROW_BATCH_SIZE;
@@ -107,6 +111,7 @@ public class Serializer {
   private int fieldSize;
 
   private final NullableMapVector rootVector;
+  private final DecimalHolder decimalHolder = new DecimalHolder();
 
   //Constructor for non-serde serialization
   public Serializer(Configuration conf, String attemptId, List 
typeInfos, List fieldNames) {
@@ -277,7 +282,7 @@ public class Serializer {
 }
   }
 
-  private static void write(FieldVector arrowVector, ColumnVector hiveVector, 
TypeInfo typeInfo, int size,
+  private void write(FieldVector arrowVector, ColumnVector hiveVector, 
TypeInfo typeInfo, int size,
   VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
 switch (typeInfo.getCategory()) {
   case PRIMITIVE:
@@ -300,7 +305,7 @@ public class Serializer {
   }
   }
 
-  private static void writeMap(ListVector arrowVector, MapColumnVector 
hiveVector, MapTypeInfo typeInfo,
+  private void writeMap(ListVector arrowVector, MapColumnVector hiveVector, 
MapTypeInfo typeInfo,
   int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) {
 final ListTypeInfo structListTypeInfo = toStructListTypeInfo(typeInfo);
 final ListColumnVector structListVector = toStructListVector(hiveVector);
@@ -317,7 +322,7 @@ public class Serializer {
 }
   }
 
-  private static void writeUnion(FieldVector arrowVector, ColumnVector 
hiveVector, TypeInfo typeInfo,
+  private void writeUnion(FieldVector arrowVector, ColumnVector hiveVector, 
TypeInfo typeInfo,
   int size,

[08/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out 
b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
index c5d0d63..3412d8a 100644
--- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
@@ -2128,7 +2128,7 @@ Stage-0
 Filter Operator [FIL_23] (rows=26 width=491)
   predicate:first_value_window_0 is not null
   PTF Operator [PTF_10] (rows=26 width=491)
-Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS 
FIRST","partition by:":"_col2"}]
+Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS 
LAST","partition by:":"_col2"}]
 Select Operator [SEL_9] (rows=26 width=491)
   Output:["_col1","_col2","_col5"]
 <-Map 4 [PARTITION-LEVEL SORT]
@@ -2558,7 +2558,7 @@ Stage-0
 Select Operator [SEL_4] (rows=20 width=64)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"]
   PTF Operator [PTF_3] (rows=20 width=621)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col0"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}]
 Select Operator [SEL_2] (rows=20 width=621)
   Output:["_col0","_col1","_col2","_col3"]
 <-Map 1 [PARTITION-LEVEL SORT]
@@ -2585,7 +2585,7 @@ Stage-0
 Select Operator [SEL_4] (rows=25 width=179)
   Output:["_col0","_col1","_col2"]
   PTF Operator [PTF_3] (rows=25 width=443)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col0"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}]
 Select Operator [SEL_2] (rows=25 width=443)
   Output:["_col0","_col1"]
 <-Map 1 [PARTITION-LEVEL SORT]
@@ -4057,14 +4057,14 @@ Stage-0
 Select Operator [SEL_7] (rows=26 width=239)
   Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
   PTF Operator [PTF_6] (rows=26 width=499)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}]
 Select Operator [SEL_5] (rows=26 width=499)
   Output:["_col1","_col2","_col5","_col7"]
 <-Reducer 2 [PARTITION-LEVEL SORT]
   PARTITION-LEVEL SORT [RS_4]
 PartitionCols:_col2
 PTF Operator [PTF_3] (rows=26 width=499)
-  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition 
by:":"_col2"}}]
+  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition 
by:":"_col2"}}]
   Select Operator [SEL_2] (rows=26 width=499)
 Output:["_col1","_col2","_col5","_col7"]
   <-Map 1 [PARTITION-LEVEL SORT]
@@ -4102,14 +4102,14 @@ Stage-0
 Select Operator [SEL_14] (rows=27 width=227)
   Output:["_col0","_col1","_col2","_col3"]
   PTF Operator [PTF_13] (rows=27 width=223)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}]
 Select Operator [SEL_12] (rows=27 width=223)
   Output:["_col1","_col2","_col5"]
 <-Reducer 2 [PARTITION-LEVEL SORT]
   PARTITION-LEVEL SORT [RS_11]
 PartitionCols:_col2
 PTF Operator [PTF_10] (rows=27 width=223)
-  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition 
by:":"_col2"}}]
+  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition 
by:":"_col2"}}]
   Select Operator [SEL_9] (rows=27 width=223)
 Output:["_col1","_col2","_col5"]
   <-Map 1 [PARTITION-LEVEL SORT]
@@ -4167,14 +4167,14 @@ Stage-0
 Select

[04/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out 
b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
index e3e354f..a530b3b 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
@@ -59,6 +59,37 @@ POSTHOOK: query: SELECT * FROM DECIMAL_PRECISION ORDER BY 
`dec`
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@decimal_precision
  A masked pattern was here 
+0.00
+0.00
+0.00
+0.00
+0.00
+0.1234567890
+0.1234567890
+1.2345678901
+1.2345678901
+1.2345678901
+12.3456789012
+12.3456789012
+12.3456789012
+123.4567890123
+123.4567890123
+123.4567890123
+1234.5678901235
+1234.5678901235
+1234.5678901235
+12345.6789012346
+12345.6789012346
+123456.7890123456
+123456.7890123457
+1234567.8901234560
+1234567.8901234568
+12345678.9012345600
+12345678.9012345679
+123456789.0123456000
+123456789.0123456789
+1234567890.123456
+1234567890.1234567890
 NULL
 NULL
 NULL
@@ -103,37 +134,6 @@ NULL
 NULL
 NULL
 NULL
-0.00
-0.00
-0.00
-0.00
-0.00
-0.1234567890
-0.1234567890
-1.2345678901
-1.2345678901
-1.2345678901
-12.3456789012
-12.3456789012
-12.3456789012
-123.4567890123
-123.4567890123
-123.4567890123
-1234.5678901235
-1234.5678901235
-1234.5678901235
-12345.6789012346
-12345.6789012346
-123456.7890123456
-123456.7890123457
-1234567.8901234560
-1234567.8901234568
-12345678.9012345600
-12345678.9012345679
-123456789.0123456000
-123456789.0123456789
-1234567890.123456
-1234567890.1234567890
 PREHOOK: query: SELECT `dec`, `dec` + 1, `dec` - 1 FROM DECIMAL_PRECISION 
ORDER BY `dec`
 PREHOOK: type: QUERY
 PREHOOK: Input: default@decimal_precision
@@ -142,50 +142,6 @@ POSTHOOK: query: SELECT `dec`, `dec` + 1, `dec` - 1 FROM 
DECIMAL_PRECISION ORDER
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@decimal_precision
  A masked pattern was here 
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
 0.00   1.00-1.00
 0.00   1.00-1.00
 0.00   1.00-1.00
@@ -217,14 +173,6 @@ NULL   NULLNULL
 123456789.0123456789   123456790.0123456789123456788.0123456789
 1234567890.123456  1234567891.123456   1234567889.123456
 1234567890.1234567890  1234567891.1234567890   1234567889.1234567890
-PREHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
-PREHOOK: type: QUERY
-PREHOOK: Input: default@decimal_precision
- A masked pattern was here 
-POSTHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@decimal_precision
- A masked pattern was here 
 NULL   NULLNULL
 NULL   NULLNULL
 NULL   NULLNULL
@@ -269,6 +217,14 @@ NULL   NULLNULL
 NULL   NULLNULL
 NULL   NULLNULL
 NULL   NULLNULL
+PREHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_precision
+ A masked pattern was here 
+POSTHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_precision
+ A masked pattern was here 
 0.00   0.000.
 0.00   0.000.
 0.00   0.000.
@@ -300,6 +256,50 @@ NULL   NULLNULL
 123456789.0123456789   246913578.024691357841152263.004115226300
 1234567890.123456  2469135780.246912   411522630.04115200
 1234567890.1234567890  2469135780.2469135780   411522630.041152263000
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULL

[09/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/spark/ptf.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/ptf.q.out 
b/ql/src/test/results/clientpositive/spark/ptf.q.out
index 91f11bb..4ff8c06 100644
--- a/ql/src/test/results/clientpositive/spark/ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/ptf.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: 
int, _col7: double
 partition by: _col2
 raw input shape:
@@ -83,7 +83,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -261,7 +261,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -287,7 +287,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -411,7 +411,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -533,7 +533,7 @@ STAGE PLANS:
   Partition table definition
 input alias: abc
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: 
int, _col7: double
 partition by: _col2
 raw input shape:
@@ -559,7 +559,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -709,7 +709,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -735,7 +735,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -888,7 +888,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -923,7 +923,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name:

[16/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vector_order_null.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_order_null.q.out 
b/ql/src/test/results/clientpositive/llap/vector_order_null.q.out
index cb4053e..d42196c 100644
--- a/ql/src/test/results/clientpositive/llap/vector_order_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_order_null.q.out
@@ -875,7 +875,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: za
+reduceColumnNullOrder: zz
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -923,9 +923,9 @@ x.a x.b
 1  A
 2  A
 2  B
-NULL   NULL
 2  NULL
 3  NULL
+NULL   NULL
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT x.* FROM src_null_n3 x ORDER BY b desc nulls last, a
 PREHOOK: type: QUERY
@@ -997,7 +997,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: za
+reduceColumnNullOrder: zz
 reduceColumnSortOrder: -+
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1045,9 +1045,9 @@ x.a   x.b
 2  B
 1  A
 2  A
-NULL   NULL
 2  NULL
 3  NULL
+NULL   NULL
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT x.* FROM src_null_n3 x ORDER BY a asc nulls last, b desc
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out
index c74a588..e9ebb74 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out
@@ -120,11 +120,11 @@ POSTHOOK: query: select * from small_alltypesorc3a
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc3a
  A masked pattern was here 
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+NULL   -16306  384405526   -1645852809 NULL-16306.0b5SoK8  
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.105 truefalse
+NULL   -16307  559926362   -1645852809 NULL-16307.0
nA8bdtWfPPQyP2hL5   xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:58.072 
false   false
+NULL   -16309  -826497289  -1645852809 NULL-16309.0
54o058c3mK6ewOQ5xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:10.761 
false   false
+NULL   -16310  206154150   1864027286  NULL-16310.05Hy1y6  
4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:00.821 false   true
+NULL   -16379  -894716315  1864027286  NULL-16379.0
2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:47.059 
truetrue
 PREHOOK: query: select * from small_alltypesorc4a
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc4a
@@ -211,11 +211,11 @@ POSTHOOK: Input: default@small_alltypesorc_a
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
 -64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
 -64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d

[07/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
index 9be72ea..70f7401 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
@@ -130,16 +130,16 @@ POSTHOOK: query: select * from small_alltypesorc3b
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc3b
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:11.799 
truetrue
-NULL   NULL-738747840  -1645852809 NULLNULL
vmAT10eeE47fgH20pLi xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.55  
truefalse
-NULL   NULL-838810013  1864027286  NULLNULLN016jPED08o 
4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:44.252 false   true
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+NULL   -16269  -378213344  -1645852809 NULL-16269.0
sOdj1Tmvbl03f   xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:15.867 false   
false
+NULL   -16274  -671342269  -1645852809 NULL-16274.0
3DE7EQo4KyT0hS  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:51.469 false   
false
+NULL   -16296  -146635689  -1645852809 NULL-16296.0
r251rbt884txX2MNq4MM14  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:44.809 
false   false
+NULL   -16296  593429004   -1887561756 NULL-16296.0
dhDYJ076SFcC4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:47.422 false   
false
+NULL   -16300  -860437234  -1645852809 NULL-16300.0
Fb2W1r24opqN8m6571p xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:45.815 
truefalse
+NULL   -16306  384405526   -1645852809 NULL-16306.0b5SoK8  
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.105 truefalse
+NULL   -16307  559926362   -1645852809 NULL-16307.0
nA8bdtWfPPQyP2hL5   xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:58.072 
false   false
+NULL   -16309  -826497289  -1645852809 NULL-16309.0
54o058c3mK6ewOQ5xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:10.761 
false   false
+NULL   -16310  206154150   1864027286  NULL-16310.05Hy1y6  
4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:00.821 false   true
+NULL   -16379  -894716315  1864027286  NULL-16379.0
2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:47.059 
truetrue
 PREHOOK: query: select * from small_alltypesorc4b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc4b
@@ -236,16 +236,16 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
 -64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
 -64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P

[12/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index cc7699b..57147ae 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -259,7 +259,7 @@ STAGE PLANS:
 Map Operator Tree:
 TableScan
   alias: alltypesorc_string
-  Statistics: Num rows: 52 Data size: 4356 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 52 Data size: 4276 Basic stats: 
COMPLETE Column stats: COMPLETE
   TableScan Vectorization:
   native: true
   Select Operator
@@ -270,7 +270,7 @@ STAGE PLANS:
 native: true
 projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 
0, 1, 3, 13, 14, 15, 16, 17]
 selectExpressions: VectorUDFUnixTimeStampTimestamp(col 
1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) 
-> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, 
VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, 
VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, 
VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, 
VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, 
VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, 
IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 
08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 
2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, 
IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 
15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: 
col 0:boolean, col 1:timestamp) -> 16:ti
 mestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, 
col 3:timestamp) -> 17:timestamp
-Statistics: Num rows: 52 Data size: 16836 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 52 Data size: 16756 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col0 (type: bigint)
   sort order: +
@@ -278,7 +278,7 @@ STAGE PLANS:
   className: VectorReduceSinkObjectHashOperator
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  Statistics: Num rows: 52 Data size: 16836 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 52 Data size: 16756 Basic stats: 
COMPLETE Column stats: COMPLETE
   value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), 
_col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 
(type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 
(type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -307,13 +307,13 @@ STAGE PLANS:
 className: VectorSelectOperator
 native: true
 projectedOutputColumnNums: [0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 
9, 10, 11, 12, 13, 14, 15]
-Statistics: Num rows: 52 Data size: 16836 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 52 Data size: 16756 Basic stats: 
COMPLETE Column stats: COMPLETE
 File Output Operator
   compressed: false
   File Sink Vectorization:
   className: VectorFileSinkOperator
   native: false
-  Statistics: Num rows: 52 Data size: 16836 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 52 Data size: 16756 Basic stats: 
COMPLETE Column stats: COMPLETE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -371,6 +371,46 @@ ORDER BY c1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc_string
  A masked pattern

[20/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out 
b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
index f9a17a5..1379ba7 100644
--- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
@@ -1166,7 +1166,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1365,7 +1365,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1623,7 +1623,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1646,7 +1646,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1952,7 +1952,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -2151,7 +2151,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -2410,7 +2410,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -2433,7 +2433,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out 
b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index 57ff575..44b7a77 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -97,7 +97,6 @@ POSTHOOK: query: select distinct si, si%4 from over10k_n2 
order by si
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k_n2
  A masked pattern was here 
-NULL   NULL
 2560
 2571
 2582
@@ -350,6 +349,7 @@ NULLNULL
 5091
 5102
 5113
+NULL   NULL
 PREHOOK: query: insert into over10k_orc_bucketed select * from

[18/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out 
b/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out
index 3b3b13d..0ed326f 100644
--- a/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out
+++ b/ql/src/test/results/clientpositive/llap/ptf_streaming.q.out
@@ -60,7 +60,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noopstreaming
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: 
int, _col7: double
 partition by: _col2
 raw input shape:
@@ -87,7 +87,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -270,7 +270,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noopstreaming
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -297,7 +297,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -442,7 +442,7 @@ STAGE PLANS:
   Partition table definition
 input alias: abc
 name: noopstreaming
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col0: int, _col1: string, _col2: 
string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, 
_col8: string
 partition by: _col2
 raw input shape:
@@ -566,7 +566,7 @@ STAGE PLANS:
 Partition table definition
   input alias: ptf_1
   name: noopwithmapstreaming
-  order by: p_name ASC NULLS FIRST, p_size DESC NULLS 
LAST
+  order by: p_name ASC NULLS LAST, p_size DESC NULLS 
LAST
   output shape: p_name: string, p_mfgr: string, 
p_size: int
   partition by: p_mfgr
   raw input shape:
@@ -596,7 +596,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noopwithmapstreaming
-order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST
+order by: _col1 ASC NULLS LAST, _col5 DESC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -623,7 +623,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST
+order by: _col1 ASC NULLS LAST, _col5 DESC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -740,7 +740,7 @@ STAGE PLANS:
 Partition table definition
   input alias: ptf_1
   name: noopwithmapstreaming
-  order by: p_name ASC NULLS FIRST
+  order by: p_name ASC NULLS LAST
   output shape: p_name: string, p_mfgr: string, 
p_size: int, p_retailprice: double
   partition by: p_mfgr
   raw input shape:
@@ -771,7 +771,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noopwithmapstreaming
-

[03/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/vector_outer_join3.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out 
b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
index 45d9c3e..2a76393 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
@@ -94,11 +94,11 @@ POSTHOOK: query: select * from small_alltypesorc1a_n1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc1a_n1
  A masked pattern was here 
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+-64-10462  626923679   NULL-64.0   -10462.0821UdmGbkEf4j   
NULL1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 trueNULL
+-64-15920  528534767   NULL-64.0   -15920.0
cvLH6Eat2yFsyy7pNULL1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 
trueNULL
+-64-6907   253665376   NULL-64.0   -6907.0 1cGVWH7n1QU NULL
NULL1969-12-31 15:59:53.66  trueNULL
+-64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
+-64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
 PREHOOK: query: select * from small_alltypesorc2a_n1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc2a_n1
@@ -216,16 +216,16 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1
 -51NULLNULL-1874052220 -51.0   NULLc61B47I604gymFJ sjWQS78 
1969-12-31 16:00:08.451 NULLfalse   false
 -51NULLNULL-1927203921 -51.0   NULL45ja5suO42S0I0  
1969-12-31 16:00:08.451 NULLtruetrue
 -51NULLNULL-1970551565 -51.0   NULLr2uhJH3 loXMWyrHjVeK
1969-12-31 16:00:08.451 NULLfalse   false
+-64-10462  626923679   NULL-64.0   -10462.0821UdmGbkEf4j   
NULL1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 trueNULL
+-64-15920  528534767   NULL-64.0   -15920.0
cvLH6Eat2yFsyy7pNULL1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 
trueNULL
+-64-6907   253665376   NULL-64.0   -6907.0 1cGVWH7n1QU NULL
NULL1969-12-31 15:59:53.66  trueNULL
 -64-7196   NULL-1615920595 -64.0   -7196.0 NULLX5rDjl  
1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULLfalse
 -64-7196   NULL-1639157869 -64.0   -7196.0 NULL
IJ0Oj7qAiqNGsN7gn   1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL
false
 -64-7196   NULL-527203677  -64.0   -7196.0 NULL
JBE4H5RoK412Cs260I721969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL
true
 -64-7196   NULL406535485   -64.0   -7196.0 NULLE011i   
1969-12-31 15:59:56.048 1969-12-31 15:59:58.174 NULLfalse
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+-64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
+-64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
 PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1

[02/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/vector_outer_join4.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out 
b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
index d0bfa41..18f689e 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
@@ -130,16 +130,16 @@ POSTHOOK: query: select * from small_alltypesorc3b
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc3b
  A masked pattern was here 
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:11.799 
truetrue
-NULL   NULL-738747840  -1645852809 NULLNULL
vmAT10eeE47fgH20pLi xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.55  
truefalse
-NULL   NULL-838810013  1864027286  NULLNULLN016jPED08o 
4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:44.252 false   true
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+NULL   -16269  -378213344  -1645852809 NULL-16269.0
sOdj1Tmvbl03f   xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:15.867 false   
false
+NULL   -16274  -671342269  -1645852809 NULL-16274.0
3DE7EQo4KyT0hS  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:51.469 false   
false
+NULL   -16296  -146635689  -1645852809 NULL-16296.0
r251rbt884txX2MNq4MM14  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:44.809 
false   false
+NULL   -16296  593429004   -1887561756 NULL-16296.0
dhDYJ076SFcC4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:47.422 false   
false
+NULL   -16300  -860437234  -1645852809 NULL-16300.0
Fb2W1r24opqN8m6571p xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:45.815 
truefalse
+NULL   -16306  384405526   -1645852809 NULL-16306.0b5SoK8  
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.105 truefalse
+NULL   -16307  559926362   -1645852809 NULL-16307.0
nA8bdtWfPPQyP2hL5   xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:58.072 
false   false
+NULL   -16309  -826497289  -1645852809 NULL-16309.0
54o058c3mK6ewOQ5xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:10.761 
false   false
+NULL   -16310  206154150   1864027286  NULL-16310.05Hy1y6  
4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:00.821 false   true
+NULL   -16379  -894716315  1864027286  NULL-16379.0
2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:47.059 
truetrue
 PREHOOK: query: select * from small_alltypesorc4b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc4b
@@ -236,16 +236,16 @@ POSTHOOK: Input: default@small_alltypesorc_b
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
 -64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
 -64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:11.799

[11/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/partition_vs_table_metadata.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/partition_vs_table_metadata.q.out 
b/ql/src/test/results/clientpositive/partition_vs_table_metadata.q.out
index 6fc8b06..1b576ee 100644
--- a/ql/src/test/results/clientpositive/partition_vs_table_metadata.q.out
+++ b/ql/src/test/results/clientpositive/partition_vs_table_metadata.q.out
@@ -49,1003 +49,1003 @@ POSTHOOK: Input: default@partition_vs_table
 POSTHOOK: Input: default@partition_vs_table@ds=100
 POSTHOOK: Input: default@partition_vs_table@ds=101
  A masked pattern was here 
-0  val_0   NULL
-0  val_0   NULL
-0  val_0   NULL
 0  val_0   0
 0  val_0   0
 0  val_0   0
-10 val_10  NULL
+0  val_0   NULL
+0  val_0   NULL
+0  val_0   NULL
 10 val_10  10
-100val_100 NULL
-100val_100 NULL
+10 val_10  NULL
 100val_100 100
 100val_100 100
-103val_103 NULL
-103val_103 NULL
+100val_100 NULL
+100val_100 NULL
 103val_103 103
 103val_103 103
-104val_104 NULL
-104val_104 NULL
+103val_103 NULL
+103val_103 NULL
 104val_104 104
 104val_104 104
-105val_105 NULL
+104val_104 NULL
+104val_104 NULL
 105val_105 105
-11 val_11  NULL
+105val_105 NULL
 11 val_11  11
-111val_111 NULL
+11 val_11  NULL
 111val_111 111
-113val_113 NULL
-113val_113 NULL
+111val_111 NULL
 113val_113 113
 113val_113 113
-114val_114 NULL
+113val_113 NULL
+113val_113 NULL
 114val_114 114
-116val_116 NULL
+114val_114 NULL
 116val_116 116
-118val_118 NULL
-118val_118 NULL
+116val_116 NULL
 118val_118 118
 118val_118 118
-119val_119 NULL
-119val_119 NULL
-119val_119 NULL
+118val_118 NULL
+118val_118 NULL
 119val_119 119
 119val_119 119
 119val_119 119
-12 val_12  NULL
-12 val_12  NULL
+119val_119 NULL
+119val_119 NULL
+119val_119 NULL
 12 val_12  12
 12 val_12  12
-120val_120 NULL
-120val_120 NULL
+12 val_12  NULL
+12 val_12  NULL
 120val_120 120
 120val_120 120
-125val_125 NULL
-125val_125 NULL
+120val_120 NULL
+120val_120 NULL
 125val_125 125
 125val_125 125
-126val_126 NULL
+125val_125 NULL
+125val_125 NULL
 126val_126 126
-128val_128 NULL
-128val_128 NULL
-128val_128 NULL
+126val_126 NULL
 128val_128 128
 128val_128 128
 128val_128 128
-129val_129 NULL
-129val_129 NULL
+128val_128 NULL
+128val_128 NULL
+128val_128 NULL
 129val_129 129
 129val_129 129
-131val_131 NULL
+129val_129 NULL
+129val_129 NULL
 131val_131 131
-133val_133 NULL
+131val_131 NULL
 133val_133 133
-134val_134 NULL
-134val_134 NULL
+133val_133 NULL
 134val_134 134
 134val_134 134
-136val_136 NULL
+134val_134 NULL
+134val_134 NULL
 136val_136 136
-137val_137 NULL
-137val_137 NULL
+136val_136 NULL
 137val_137 137
 137val_137 137
-138val_138 NULL
-138val_138 NULL
-138val_138 NULL
-138val_138 NULL
+137val_137 NULL
+137val_137 NULL
 138val_138 138
 138val_138 138
 138val_138 138
 138val_138 138
-143val_143 NULL
+138val_138 NULL
+138val_138 NULL
+138val_138 NULL
+138val_138 NULL
 143val_143 143
-145val_145 NULL
+143val_143 NULL
 145val_145 145
-146val_146 NULL
-146val_146 NULL
+145val_145 NULL
 146val_146 146
 146val_146 146
-149val_149 NULL
-149val_149 NULL
+146val_146 NULL
+146val_146 NULL
 149val_149 149
 149val_149 149
-15 val_15  NULL
-15 val_15  NULL
+149val_149 NULL
+149val_149 NULL
 15 val_15  15
 15 val_15  15
-150val_150 NULL
+15 val_15  NULL
+15 val_15  NULL
 150val_150 150
-152val_152 NULL
-152val_152 NULL
+150val_150 NULL
 152val_152 152
 152val_152 152
-153val_153 NULL
+152val_152 NULL
+152val_152 NULL
 153val_153 153
-155val_155 NULL
+153val_153 NULL
 155val_155 155
-156val_156 NULL
+155val_155 NULL
 156val_156 156
-157val_157 NULL
+156val_156 NULL
 157val_157 157
-158val_158 NULL
+157val_157 NULL
 158val_158 158
-160val_160 NULL
+158val_158 NULL
 160val_160 160
-162val_162 NULL
+160val_160 NULL
 162val_162 162
-163val_163 NULL
+162val_162 NULL
 163val_163 163
-164val_164 NULL
-164val_164 NULL
+163val_163 NULL
 164val_164 164
 164val_164 164
-165val_165 NULL
-165val_165 NULL
+164val_164 NULL
+164val_164 NULL
 165val_165 165
 165val_165 165
-166val_166 NULL
+165val_165 NULL
+165val_165 NULL
 166val_166 166
-167

[06/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index 3fb3203..1b927f8 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -1227,56 +1227,56 @@ LIMIT 50
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
  A masked pattern was here 
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -51 -51.0   
1969-12-31 15:59:43.64  -7196   -1339164819 4992406445232   NULLNULL
7196-14392  -7196   NULLNULL51.06.4051596E8 
-5.157308006568995E-5   51  -1.5598627
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:45.978 -7196   -2128720310 7935869315680   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:47.15  -7196   628698169   -2343786774032  NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:57.86  -7196   -26309289   98081029392 NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:58.479 -7196   -1379694191 5143499944048   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 16:00:03.963 -7196   95444104-355815619712   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 16:00:04.518 -7196   -1658319459 6182214943152   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 15:59:48.882 -7196   -1560660031 5818140595568   NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 15:59:57.663 -7196   898472381   -3349505036368  NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 16:00:11.36  -7196   -1357789899 5061840743472   NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 15:59:53.657 -7196   1476582815  -5504700734320  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:05.688 -7196   1614836149  -6020109163472  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:06.484 -7196   1605976008  -5987078557824  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:11.198 -7196   1650677402  -6153725354656  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 15:59:43.932 -7196   1982381637  -7390318742736  NULLNULL
7196-14392  -7196   NULLNULL55.06.4051596E8 
-5.561802752182249E-5   55  -1.4464182
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 16:00:01.138 -7196   888532643   -3312449693104  NULLNULL
7196-14392  -7196   NULLNULL55.06.4051596E8 
-5.561802752182249E-5   55  -1.4464182
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 16:00:13.249 -7196

[15/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
index a5d6167..ebc437e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -292,7 +292,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col0
 raw input shape:
 window functions:
@@ -499,7 +499,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col0
 raw input shape:
 window functions:
@@ -659,7 +659,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -687,7 +687,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -869,7 +869,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -1070,7 +1070,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -1321,7 +1321,7 @@ STAGE PLANS:
   Partition table definition
 input alias: abc
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col0: int, _col1: string, _col2: 
string, _col5: int, _col7: double
 partition by: _col2
 raw input shape:
@@ -1373,7 +1373,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -1561,7 +1561,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST
+order by: _col1 ASC NULLS LAST, _col5 DESC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -1720,7 +1720,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1

[01/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/branch-3 fdc12f38f -> 0427ffa57


http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
index 8bd77f9..dc82fb6 100644
--- a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
@@ -253,7 +253,7 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan
 alias: alltypesorc_string
-Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
 TableScan Vectorization:
 native: true
 Select Operator
@@ -264,7 +264,7 @@ STAGE PLANS:
   native: true
   projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 
3, 13, 14, 15, 16, 17]
   selectExpressions: VectorUDFUnixTimeStampTimestamp(col 
1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) 
-> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, 
VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, 
VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, 
VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, 
VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, 
VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, 
IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 
08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 
2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, 
IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 
15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: 
col 0:boolean, col 1:timestamp) -> 16:timestam
 p, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 
3:timestamp) -> 17:timestamp
-  Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: bigint)
 sort order: +
@@ -273,7 +273,7 @@ STAGE PLANS:
 native: false
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
 nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
-Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
 value expressions: _col1 (type: int), _col2 (type: int), _col3 
(type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 
(type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: 
timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: 
timestamp), _col15 (type: timestamp), _col16 (type: timestamp)
   Execution mode: vectorized
   Map Vectorization:
@@ -293,10 +293,10 @@ STAGE PLANS:
 Select Operator
   expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: 
int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col2 (type: 
int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: 
int), VALUE._col6 (type: int), VALUE._col7 (type: boolean), VALUE._col8 (type: 
timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), 
VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 
(type: timestamp), VALUE._col14 (type: timestamp)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
-  Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
   File Output Operator
 compressed: false
-Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
 table:
 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
 output format:

[13/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index 735dcdd..dea50ba 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -1237,56 +1237,56 @@ LIMIT 50
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
  A masked pattern was here 
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -51 -51.0   
1969-12-31 15:59:43.64  -7196   -1339164819 4992406445232   NULLNULL
7196-14392  -7196   NULLNULL51.06.4051596E8 
-5.157308006568995E-5   51  -1.5598627
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:45.978 -7196   -2128720310 7935869315680   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:47.15  -7196   628698169   -2343786774032  NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:57.86  -7196   -26309289   98081029392 NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:58.479 -7196   -1379694191 5143499944048   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 16:00:03.963 -7196   95444104-355815619712   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 16:00:04.518 -7196   -1658319459 6182214943152   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 15:59:48.882 -7196   -1560660031 5818140595568   NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 15:59:57.663 -7196   898472381   -3349505036368  NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 16:00:11.36  -7196   -1357789899 5061840743472   NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 15:59:53.657 -7196   1476582815  -5504700734320  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:05.688 -7196   1614836149  -6020109163472  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:06.484 -7196   1605976008  -5987078557824  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:11.198 -7196   1650677402  -6153725354656  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 15:59:43.932 -7196   1982381637  -7390318742736  NULLNULL
7196-14392  -7196   NULLNULL55.06.4051596E8 
-5.561802752182249E-5   55  -1.4464182
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 16:00:01.138 -7196   888532643   -3312449693104  NULLNULL
7196-14392  -7196   NULLNULL55.06.4051596E8 
-5.561802752182249E-5   55  -1.4464182
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 16:00:13.249 -7196   -685064281

[14/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out
index 93b8655..520896c 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out
@@ -125,7 +125,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col7 ASC NULLS FIRST, _col3 ASC NULLS FIRST
+order by: _col7 ASC NULLS LAST, _col3 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -347,7 +347,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col7 ASC NULLS FIRST, _col4 ASC NULLS FIRST
+order by: _col7 ASC NULLS LAST, _col4 ASC NULLS LAST
 partition by: _col5
 raw input shape:
 window functions:
@@ -570,7 +570,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col4 ASC NULLS FIRST
+order by: _col4 ASC NULLS LAST
 partition by: _col8
 raw input shape:
 window functions:
@@ -792,7 +792,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col7 ASC NULLS FIRST, _col4 ASC NULLS FIRST
+order by: _col7 ASC NULLS LAST, _col4 ASC NULLS LAST
 partition by: _col8
 raw input shape:
 window functions:
@@ -1014,7 +1014,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col7 ASC NULLS FIRST, _col5 DESC NULLS LAST
+order by: _col7 ASC NULLS LAST, _col5 DESC NULLS LAST
 partition by: _col0
 raw input shape:
 window functions:
@@ -1221,7 +1221,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1249,7 +1249,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col7 ASC NULLS FIRST
+order by: _col7 ASC NULLS LAST
 partition by: _col8
 raw input shape:
 window functions:
@@ -1478,7 +1478,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1506,7 +1506,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col4 ASC NULLS FIRST
+order by: _col4 ASC NULLS LAST
 partition by: _col8
 raw input shape:
 window functions:
@@ -1751,7 +1751,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col4 ASC NULLS FIRST
+order by: _col4 ASC NULLS LAST
 partition by: _col8
 raw input shape:

[17/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out
index 3170625..d8b3a5f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out
@@ -103,7 +103,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -147,8 +147,8 @@ POSTHOOK: query: SELECT `dec` FROM `DECIMAL` order by `dec`
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@decimal
  A masked pattern was here 
-NULL
 10
+NULL
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT `dec` FROM `decimal_txt` order by `dec`
 PREHOOK: type: QUERY
@@ -219,7 +219,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -263,8 +263,8 @@ POSTHOOK: query: SELECT `dec` FROM `decimal_txt` order by 
`dec`
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@decimal_txt
  A masked pattern was here 
-NULL
 10
+NULL
 PREHOOK: query: DROP TABLE DECIMAL_txt
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@decimal_txt

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out
index 8cd753c..d59a173 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out
@@ -92,7 +92,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -208,7 +208,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -324,7 +324,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -440,7 +440,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -556,7 +556,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -672,7 +672,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-

[05/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
index 1791c89..4ea12d7 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
@@ -258,7 +258,7 @@ STAGE PLANS:
 Map Operator Tree:
 TableScan
   alias: alltypesorc_string
-  Statistics: Num rows: 52 Data size: 3219 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 52 Data size: 8979 Basic stats: 
COMPLETE Column stats: NONE
   TableScan Vectorization:
   native: true
   Select Operator
@@ -269,7 +269,7 @@ STAGE PLANS:
 native: true
 projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 
0, 1, 3, 13, 14, 15, 16, 17]
 selectExpressions: VectorUDFUnixTimeStampTimestamp(col 
1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) 
-> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, 
VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, 
VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, 
VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, 
VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, 
VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, 
IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 
08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 
2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, 
IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 
15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: 
col 0:boolean, col 1:timestamp) -> 16:ti
 mestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, 
col 3:timestamp) -> 17:timestamp
-Statistics: Num rows: 52 Data size: 3219 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: bigint)
   sort order: +
@@ -277,7 +277,7 @@ STAGE PLANS:
   className: VectorReduceSinkObjectHashOperator
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  Statistics: Num rows: 52 Data size: 3219 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 52 Data size: 8979 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), 
_col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 
(type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 
(type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp)
 Execution mode: vectorized
 Map Vectorization:
@@ -305,13 +305,13 @@ STAGE PLANS:
 className: VectorSelectOperator
 native: true
 projectedOutputColumnNums: [0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 
9, 10, 11, 12, 13, 14, 15]
-Statistics: Num rows: 52 Data size: 3219 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
 File Output Operator
   compressed: false
   File Sink Vectorization:
   className: VectorFileSinkOperator
   native: false
-  Statistics: Num rows: 52 Data size: 3219 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 52 Data size: 8979 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -369,6 +369,46 @@ ORDER BY c1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc_string
  A masked pattern was here 
+-16196912  31  31

[10/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out 
b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
index e1e36c0..9b0baaa 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
@@ -65,7 +65,7 @@ STAGE PLANS:
   Stage: Stage-1
 Spark
   Edges:
-Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL 
SORT, 4)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -142,12 +142,12 @@ PREHOOK: query: select table1_n10.id, table1_n10.val, 
table1_n10.val1 from table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@table1_n10
 PREHOOK: Input: default@table3_n0
- A masked pattern was here 
+PREHOOK: Output: hdfs://### HDFS PATH ###
 POSTHOOK: query: select table1_n10.id, table1_n10.val, table1_n10.val1 from 
table1_n10 left semi join table3_n0 on table1_n10.dimid = table3_n0.id where 
table1_n10.val = 't1val01'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@table1_n10
 POSTHOOK: Input: default@table3_n0
- A masked pattern was here 
+POSTHOOK: Output: hdfs://### HDFS PATH ###
 1  t1val01 val101
 3  t1val01 val104
 3  t1val01 val106
@@ -163,8 +163,8 @@ STAGE PLANS:
   Stage: Stage-1
 Spark
   Edges:
-Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL 
SORT, 2)
-Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 
(PARTITION-LEVEL SORT, 2)
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL 
SORT, 4)
+Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 4), Reducer 2 
(PARTITION-LEVEL SORT, 4)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -277,13 +277,13 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@table1_n10
 PREHOOK: Input: default@table2_n6
 PREHOOK: Input: default@table3_n0
- A masked pattern was here 
+PREHOOK: Output: hdfs://### HDFS PATH ###
 POSTHOOK: query: select table1_n10.id, table1_n10.val, table2_n6.val2 from 
table1_n10 inner join table2_n6 on table1_n10.val = 't1val01' and table1_n10.id 
= table2_n6.id left semi join table3_n0 on table1_n10.dimid = table3_n0.id
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@table1_n10
 POSTHOOK: Input: default@table2_n6
 POSTHOOK: Input: default@table3_n0
- A masked pattern was here 
+POSTHOOK: Output: hdfs://### HDFS PATH ###
 1  t1val01 t2val201
 3  t1val01 t2val203
 3  t1val01 t2val203
@@ -299,8 +299,8 @@ STAGE PLANS:
   Stage: Stage-1
 Spark
   Edges:
-Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL 
SORT, 2)
-Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 
(PARTITION-LEVEL SORT, 2)
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL 
SORT, 4)
+Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 4), Reducer 2 
(PARTITION-LEVEL SORT, 4)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -412,13 +412,13 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@table1_n10
 PREHOOK: Input: default@table2_n6
 PREHOOK: Input: default@table3_n0
- A masked pattern was here 
+PREHOOK: Output: hdfs://### HDFS PATH ###
 POSTHOOK: query: select table1_n10.id, table1_n10.val, table2_n6.val2 from 
table1_n10 left semi join table3_n0 on table1_n10.dimid = table3_n0.id inner 
join table2_n6 on table1_n10.val = 't1val01' and table1_n10.id = table2_n6.id
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@table1_n10
 POSTHOOK: Input: default@table2_n6
 POSTHOOK: Input: default@table3_n0
- A masked pattern was here 
+POSTHOOK: Output: hdfs://### HDFS PATH ###
 1  t1val01 t2val201
 3  t1val01 t2val203
 3  t1val01 t2val203
@@ -434,7 +434,7 @@ STAGE PLANS:
   Stage: Stage-1
 Spark
   Edges:
-Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
+Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL 
SORT, 4)
  A masked pattern was here 
   Vertices:
 Map 1 
@@ -509,12 +509,12 @@ PREHOOK: query: select table1_n10.id, table1_n10.val, 
table1_n10.val1 from table
 PREHOOK: type: QUERY
 PREHOOK: Input: default@table1_n10
 PREHOOK: Input: default@table3_n0
- A masked pattern was here 
+PREHOOK: Output: hdfs://### HDFS PATH ###
 POSTHOOK: query: select table1_n10.id, table1_n10.val, table1_n10.val1 from 
table1_n10 left semi join table3_n0 on table1_n10.dimid = table3_n0.id and 
table3_n0.id = 100 where table1_n10.dimid <> 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@table1_n10
 POSTHOOK: Input: default@table3_n0
- A masked pattern was here

[21/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed 
by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0427ffa5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0427ffa5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0427ffa5

Branch: refs/heads/branch-3
Commit: 0427ffa57b5fc8a7e399fa6b6d868f9b989c1373
Parents: fdc12f3
Author: Teddy Choi 
Authored: Wed Sep 26 16:51:12 2018 +0900
Committer: Teddy Choi 
Committed: Wed Sep 26 16:51:15 2018 +0900

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |2 +
 .../write_final_output_blobstore.q.out  |4 +-
 .../hive/ql/parse/BaseSemanticAnalyzer.java |9 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java|4 +-
 .../apache/hadoop/hive/ql/parse/HiveParser.g|   14 +-
 .../hadoop/hive/ql/plan/PTFDeserializer.java|4 +-
 .../hive/ql/udf/generic/GenericUDAFAverage.java |   16 +-
 .../ql/udf/generic/GenericUDAFEvaluator.java|   11 +-
 .../hive/ql/udf/generic/GenericUDAFSum.java |   18 +-
 .../hive/ql/udf/ptf/BasePartitionEvaluator.java |   49 +-
 .../hive/ql/udf/ptf/TableFunctionEvaluator.java |9 +
 .../hive/ql/udf/ptf/TableFunctionResolver.java  |5 +-
 .../hive/ql/udf/ptf/ValueBoundaryScanner.java   |   93 +-
 .../hive/ql/udf/ptf/WindowingTableFunction.java |4 +-
 .../clientpositive/beeline/smb_mapjoin_13.q.out |4 +-
 ...names_with_leading_and_trailing_spaces.q.out |2 +-
 .../clientpositive/correlationoptimizer14.q.out |   37 -
 .../results/clientpositive/ctas_colname.q.out   |4 +-
 .../test/results/clientpositive/decimal_3.q.out |8 +-
 .../test/results/clientpositive/decimal_4.q.out |4 +-
 .../test/results/clientpositive/decimal_5.q.out |8 +-
 .../test/results/clientpositive/decimal_6.q.out |   12 +-
 .../clientpositive/decimal_precision.q.out  |  286 ++---
 .../results/clientpositive/decimal_serde.q.out  |4 +-
 .../clientpositive/delete_all_partitioned.q.out |2 +-
 .../clientpositive/distinct_windowing.q.out |6 +-
 .../distinct_windowing_no_cbo.q.out |   10 +-
 .../groupby_grouping_window.q.out   |2 +-
 .../results/clientpositive/input_part7.q.out|4 +-
 .../insert_values_non_partitioned.q.out |2 +-
 .../clientpositive/limit_pushdown2.q.out|   40 +-
 .../clientpositive/llap/acid_no_buckets.q.out   |   16 +-
 .../llap/acid_vectorization_original.q.out  |2 +-
 .../llap/bucket_map_join_tez2.q.out |2 -
 .../clientpositive/llap/bucketmapjoin7.q.out|2 +-
 .../results/clientpositive/llap/cbo_limit.q.out |8 +-
 .../clientpositive/llap/cbo_rp_limit.q.out  |8 +-
 ...names_with_leading_and_trailing_spaces.q.out |2 +-
 .../llap/delete_all_partitioned.q.out   |2 +-
 .../clientpositive/llap/explainuser_1.q.out |   88 +-
 .../llap/external_jdbc_table.q.out  |1 -
 .../llap/groupby_resolution.q.out   |2 +-
 .../llap/insert_into_with_schema.q.out  |4 +-
 .../llap/insert_values_non_partitioned.q.out|2 +-
 .../clientpositive/llap/limit_pushdown.q.out|8 +-
 .../clientpositive/llap/limit_pushdown3.q.out   |8 +-
 .../results/clientpositive/llap/lineage2.q.out  |2 +-
 .../results/clientpositive/llap/lineage3.q.out  |   10 +-
 .../results/clientpositive/llap/llap_acid.q.out |6 +-
 .../clientpositive/llap/llap_acid_fast.q.out|6 +-
 .../clientpositive/llap/llap_smb_ptf.q.out  |4 +-
 .../llap/materialized_view_partitioned_2.q.out  |3 -
 .../llap/materialized_view_partitioned_3.q.out  |1 -
 .../llap/materialized_view_rewrite_part_2.q.out |   30 +-
 .../llap/offset_limit_ppd_optimizer.q.out   |8 +-
 .../clientpositive/llap/orc_llap_counters.q.out |   82 +-
 .../llap/orc_llap_counters1.q.out   |   10 +-
 .../clientpositive/llap/orc_ppd_basic.q.out |  100 +-
 .../llap/orc_ppd_schema_evol_3a.q.out   |  118 +--
 .../clientpositive/llap/order_null.q.out|4 +-
 .../test/results/clientpositive/llap/ptf.q.out  |  140 +--
 .../clientpositive/llap/ptf_matchpath.q.out |6 +-
 .../clientpositive/llap/ptf_streaming.q.out |   94 +-
 .../llap/results_cache_with_masking.q.out   |1 -
 .../clientpositive/llap/sharedworkext.q.out |4 +-
 .../clientpositive/llap/skewjoinopt15.q.out |4 +-
 .../clientpositive/llap/smb_mapjoin_15.q.out|8 +-
 .../clientpositive/llap/subquery_in.q.out   |6 +-
 .../llap/subquery_in_having.q.out   |2 +-
 .../clientpositive/llap/subquery_notin.q.out|   14 +-
 .../clientpositive/llap/subquery_scalar.q.out   |4 +-
 .../results/clientpositive/llap/sysdb.q.out |   14 +-
 .../llap/tez_dynpart_hashjoin_1.q.out   |4 +-
 .../llap/tez_dynpart_hashjoin_2.q.out

[19/21] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-26 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/0427ffa5/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out 
b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
index 65eec52..b34b3be 100644
--- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
@@ -237,7 +237,7 @@ Table Parameters:
orc.bloom.filter.columns*   
orc.row.index.stride1000
rawDataSize 1139514 
-   totalSize   55665   
+   totalSize   55686   
  A masked pattern was here 
 
 # Storage Information   
@@ -255,7 +255,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd_n1
 PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 16675
+   HDFS_BYTES_READ: 16681
HDFS_BYTES_WRITTEN: 104
HDFS_READ_OPS: 7
HDFS_LARGE_READ_OPS: 0
@@ -315,7 +315,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd_n1
 PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 1055
+   HDFS_BYTES_READ: 1047
HDFS_BYTES_WRITTEN: 101
HDFS_READ_OPS: 5
HDFS_LARGE_READ_OPS: 0
@@ -336,9 +336,9 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
ALLOCATED_BYTES: 1048576
-   ALLOCATED_USED_BYTES: 2732
+   ALLOCATED_USED_BYTES: 2731
CACHE_HIT_BYTES: 24
-   CACHE_MISS_BYTES: 1055
+   CACHE_MISS_BYTES: 1047
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
NUM_VECTOR_BATCHES: 1
@@ -375,7 +375,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 22
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -413,7 +413,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 16
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -451,7 +451,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 18
RECORDS_OUT_OPERATOR_TS_0: 2000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 2
@@ -489,7 +489,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 1
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -527,7 +527,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 32
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -565,7 +565,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 32
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -603,7 +603,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 1697
RECORDS_OUT_OPERATOR_TS_0: 2000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 2
@@ -641,7 +641,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 12
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -679,7 +679,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_9: 1713
RECORDS_OUT_OPERATOR_TS_0: 2000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 2
@@ -705,7 +705,7 @@ Stage-1 FILE SYSTEM COUNTERS:
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
-   RECORDS_IN_Map_1: 1000
+   RECORDS_IN_Map_1: 100
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 6
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
@@ -715,14 +715,14 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 6
RECORDS_OUT_OPERATOR_SEL_9: 6
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_TS_0: 100
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
NUM_VECTOR_BATCHES: 1
-   ROWS_EMITTED: 1000
+   ROWS_EMITTED: 100
SELECTED_ROWGROUPS:

[06/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index 1b04155..3844c79 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -1232,56 +1232,56 @@ LIMIT 50
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
  A masked pattern was here 
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -51 -51.0   
1969-12-31 15:59:43.64  -7196   -1339164819 4992406445232   NULLNULL
7196-14392  -7196   NULLNULL51.06.4051596E8 
-5.157308006568995E-5   51  -1.5598627
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:45.978 -7196   -2128720310 7935869315680   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:47.15  -7196   628698169   -2343786774032  NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:57.86  -7196   -26309289   98081029392 NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 15:59:58.479 -7196   -1379694191 5143499944048   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 16:00:03.963 -7196   95444104-355815619712   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -52 -52.0   
1969-12-31 16:00:04.518 -7196   -1658319459 6182214943152   NULLNULL
7196-14392  -7196   NULLNULL52.06.4051596E8 
-5.258431692972308E-5   52  -1.5298654
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 15:59:48.882 -7196   -1560660031 5818140595568   NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 15:59:57.663 -7196   898472381   -3349505036368  NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -53 -53.0   
1969-12-31 16:00:11.36  -7196   -1357789899 5061840743472   NULLNULL
7196-14392  -7196   NULLNULL53.06.4051596E8 
-5.359555379375622E-5   53  -1.501
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 15:59:53.657 -7196   1476582815  -5504700734320  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:05.688 -7196   1614836149  -6020109163472  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:06.484 -7196   1605976008  -5987078557824  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -54 -54.0   
1969-12-31 16:00:11.198 -7196   1650677402  -6153725354656  NULLNULL
7196-14392  -7196   NULLNULL54.06.4051596E8 
-5.4606790657789354E-5  54  -1.4732037
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 15:59:43.932 -7196   1982381637  -7390318742736  NULLNULL
7196-14392  -7196   NULLNULL55.06.4051596E8 
-5.561802752182249E-5   55  -1.4464182
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 16:00:01.138 -7196   888532643   -3312449693104  NULLNULL
7196-14392  -7196   NULLNULL55.06.4051596E8 
-5.561802752182249E-5   55  -1.4464182
-NULL   -7196.0 1969-12-31 15:59:58.174 NULLfalse   -55 -55.0   
1969-12-31 16:00:13.249 -7196

[04/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/update_where_partitioned.q.out
--
diff --git a/ql/src/test/results/clientpositive/update_where_partitioned.q.out 
b/ql/src/test/results/clientpositive/update_where_partitioned.q.out
index 1834e83..ac603b8 100644
--- a/ql/src/test/results/clientpositive/update_where_partitioned.q.out
+++ b/ql/src/test/results/clientpositive/update_where_partitioned.q.out
@@ -53,11 +53,11 @@ POSTHOOK: Input: default@acid_uwp@ds=tomorrow
 762BLoMwUJ51ns6pd  tomorrow
 762a10E76jX35YwquKCTA  tomorrow
 762q5y2Vy1 tomorrow
-6981   NULLtomorrow
 6981   1FNNhmiFLGw425NA13g tomorrow
 6981   4KhrrQ0nJ7bMNTvhSCA tomorrow
 6981   K630vaVftomorrow
 6981   Y5x3JuI3M8jngv5Ntomorrow
+6981   YdG61y00526u5   tomorrow
 PREHOOK: query: update acid_uwp set b = 'fred' where b = 'k17Am8uPHWk02cEf1jet'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acid_uwp
@@ -99,8 +99,8 @@ POSTHOOK: Input: default@acid_uwp@ds=tomorrow
 762BLoMwUJ51ns6pd  tomorrow
 762a10E76jX35YwquKCTA  tomorrow
 762q5y2Vy1 tomorrow
-6981   NULLtomorrow
 6981   1FNNhmiFLGw425NA13g tomorrow
 6981   4KhrrQ0nJ7bMNTvhSCA tomorrow
 6981   K630vaVftomorrow
 6981   Y5x3JuI3M8jngv5Ntomorrow
+6981   YdG61y00526u5   tomorrow

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/vector_case_when_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_case_when_2.q.out 
b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
index b1cb3f2..b6acc1d 100644
--- a/ql/src/test/results/clientpositive/vector_case_when_2.q.out
+++ b/ql/src/test/results/clientpositive/vector_case_when_2.q.out
@@ -235,7 +235,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@timestamps
  A masked pattern was here 
 ctimestamp1ctimestamp2 ctimestamp2_description 
ctimestamp2_description_2   ctimestamp2_description_3   field1  field_2 
field_3 field_4 field_5
-NULL   NULLUnknown NULLNULLNULL2018-03-08 23:04:59 NULL
NULLNULL
 0004-09-22 18:26:29.51954  0004-09-21 16:23:25.51954   1800s or 
EarlierOld Old 4   0004-09-22 18:26:29.51954   26  
NULL0005-09-22
 0528-10-27 08:15:18.941718273  0528-10-26 06:12:14.941718273   1800s or 
EarlierOld Old 528 2018-03-08 23:04:59 15  NULL
0529-10-27
 1319-02-02 16:31:57.7781319-02-01 14:28:53.778 1800s or Earlier
Old Old 13191319-02-02 16:31:57.778 31  NULL1320-02-02
@@ -286,6 +285,7 @@ NULLNULLUnknown NULLNULLNULL
2018-03-08 23:04:59 NULLNULLNULL
 9075-06-13 16:20:09.218517797  9075-06-12 14:17:05.218517797   Unknown NULL
NULL90752018-03-08 23:04:59 20  NULL9075-06-14
 9209-11-11 04:08:58.223768453  9209-11-10 02:05:54.223768453   Unknown NULL
NULL92092018-03-08 23:04:59 8   NULL9209-11-12
 9403-01-09 18:12:33.5479403-01-08 16:09:29.547 Unknown NULLNULL
94032018-03-08 23:04:59 12  NULL9404-01-09
+NULL   NULLUnknown NULLNULLNULL2018-03-08 23:04:59 NULL
NULLNULL
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT
ctimestamp1,
@@ -494,7 +494,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@timestamps
  A masked pattern was here 
 ctimestamp1ctimestamp2 ctimestamp2_description 
ctimestamp2_description_2   ctimestamp2_description_3   field1  field_2 
field_3 field_4 field_5
-NULL   NULLUnknown NULLNULLNULL2018-03-08 23:04:59 NULL
NULLNULL
 0004-09-22 18:26:29.51954  0004-09-21 16:23:25.51954   1800s or 
EarlierOld Old 4   0004-09-22 18:26:29.51954   26  
NULL0005-09-22
 0528-10-27 08:15:18.941718273  0528-10-26 06:12:14.941718273   1800s or 
EarlierOld Old 528 2018-03-08 23:04:59 15  NULL
0529-10-27
 1319-02-02 16:31:57.7781319-02-01 14:28:53.778 1800s or Earlier
Old Old 13191319-02-02 16:31:57.778 31  NULL1320-02-02
@@ -545,6 +544,7 @@ NULLNULLUnknown NULLNULLNULL
2018-03-08 23:04:59 NULLNULLNULL
 9075-06-13 16:20:09.218517797  9075-06-12 14:17:05.218517797   Unknown NULL
NULL90752018-03-08 23:04:59 20  NULL9075-06-14
 9209-11-11 04:08:58.223768453  9209-11-10 02:05:54.223768453   Unknown NULL
NULL92092018-03-08 23:04:59 8   NULL9209-11-12
 9403-01-09 18:12:33.5479403-01-08 16:09:29.547 Unknown NULLNULL
94032018-03-08 23:04:59 12  NULL9404-01-09
+NULL   NULLUnknown NULLNULLNULL2018-03-08 23:04:59 NULL
NULLNULL

[17/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out 
b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out
index 1087902..e4f2452 100644
--- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out
+++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out
@@ -155,7 +155,7 @@ STAGE PLANS:
 Statistics: Num rows: ###Masked### Data size: 
###Masked### Basic stats: COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col0 (type: int)
-  null sort order: a
+  null sort order: z
   sort order: +
   Statistics: Num rows: ###Masked### Data size: 
###Masked### Basic stats: COMPLETE Column stats: COMPLETE
   tag: -1
@@ -541,7 +541,7 @@ STAGE PLANS:
 Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col0 (type: int)
-  null sort order: a
+  null sort order: z
   sort order: +
   Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: COMPLETE
   tag: -1
@@ -818,7 +818,7 @@ STAGE PLANS:
 Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col0 (type: int)
-  null sort order: a
+  null sort order: z
   sort order: +
   Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: COMPLETE
   tag: -1
@@ -1095,7 +1095,7 @@ STAGE PLANS:
 Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col0 (type: int)
-  null sort order: a
+  null sort order: z
   sort order: +
   Statistics: Num rows: ###Masked### Data size: ###Masked### 
Basic stats: COMPLETE Column stats: COMPLETE
   tag: -1

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/subquery_in.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index 21801a3..5a9021b 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -345,7 +345,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col5 ASC NULLS FIRST
+order by: _col5 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -528,7 +528,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col5 ASC NULLS FIRST
+order by: _col5 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index ab28bba..1b25b5c 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -1508,7 +1508,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col5 ASC NULLS FIRST
+order by: _col5 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/subquery_notin.q.out

[02/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/vector_outer_join4.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out 
b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
index 2826698..adbb759 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
@@ -130,16 +130,16 @@ POSTHOOK: query: select * from small_alltypesorc3b
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc3b
  A masked pattern was here 
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:11.799 
truetrue
-NULL   NULL-738747840  -1645852809 NULLNULL
vmAT10eeE47fgH20pLi xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.55  
truefalse
-NULL   NULL-838810013  1864027286  NULLNULLN016jPED08o 
4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:44.252 false   true
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+NULL   -16269  -378213344  -1645852809 NULL-16269.0
sOdj1Tmvbl03f   xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:15.867 false   
false
+NULL   -16274  -671342269  -1645852809 NULL-16274.0
3DE7EQo4KyT0hS  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:51.469 false   
false
+NULL   -16296  -146635689  -1645852809 NULL-16296.0
r251rbt884txX2MNq4MM14  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:44.809 
false   false
+NULL   -16296  593429004   -1887561756 NULL-16296.0
dhDYJ076SFcC4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:47.422 false   
false
+NULL   -16300  -860437234  -1645852809 NULL-16300.0
Fb2W1r24opqN8m6571p xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:45.815 
truefalse
+NULL   -16306  384405526   -1645852809 NULL-16306.0b5SoK8  
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.105 truefalse
+NULL   -16307  559926362   -1645852809 NULL-16307.0
nA8bdtWfPPQyP2hL5   xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:58.072 
false   false
+NULL   -16309  -826497289  -1645852809 NULL-16309.0
54o058c3mK6ewOQ5xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:10.761 
false   false
+NULL   -16310  206154150   1864027286  NULL-16310.05Hy1y6  
4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:00.821 false   true
+NULL   -16379  -894716315  1864027286  NULL-16379.0
2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:47.059 
truetrue
 PREHOOK: query: select * from small_alltypesorc4b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc4b
@@ -236,16 +236,16 @@ POSTHOOK: Input: default@small_alltypesorc_b
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
 -64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
 -64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:11.799

[18/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out 
b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
index b8ea5cf..42c2f5b 100644
--- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
@@ -207,7 +207,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd_n2
 PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 16675
+   HDFS_BYTES_READ: 16681
HDFS_BYTES_WRITTEN: 104
HDFS_READ_OPS: 7
HDFS_LARGE_READ_OPS: 0
@@ -267,7 +267,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd_n2
 PREHOOK: Output: hdfs://### HDFS PATH ###
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 1055
+   HDFS_BYTES_READ: 1047
HDFS_BYTES_WRITTEN: 101
HDFS_READ_OPS: 5
HDFS_LARGE_READ_OPS: 0
@@ -288,9 +288,9 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
ALLOCATED_BYTES: 1048576
-   ALLOCATED_USED_BYTES: 2732
+   ALLOCATED_USED_BYTES: 2731
CACHE_HIT_BYTES: 24
-   CACHE_MISS_BYTES: 1055
+   CACHE_MISS_BYTES: 1047
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
NUM_VECTOR_BATCHES: 1
@@ -327,7 +327,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 22
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -365,7 +365,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 16
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -403,7 +403,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 18
RECORDS_OUT_OPERATOR_TS_0: 2000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 2
@@ -441,7 +441,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 1
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -479,7 +479,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 32
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -517,7 +517,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 32
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -555,7 +555,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 1697
RECORDS_OUT_OPERATOR_TS_0: 2000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 2
@@ -593,7 +593,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 12
RECORDS_OUT_OPERATOR_TS_0: 1000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
@@ -631,7 +631,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 1713
RECORDS_OUT_OPERATOR_TS_0: 2000
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 2
@@ -657,7 +657,7 @@ Stage-1 FILE SYSTEM COUNTERS:
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
-   RECORDS_IN_Map_1: 1000
+   RECORDS_IN_Map_1: 100
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 6
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
@@ -667,14 +667,14 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_3: 6
RECORDS_OUT_OPERATOR_SEL_2: 6
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_TS_0: 100
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 1
NUM_VECTOR_BATCHES: 1
-   ROWS_EMITTED: 1000
+   ROWS_EMITTED: 100
SELECTED_ROWGROUPS: 1
 Stage-1 INPUT COUNTERS:
GROUPED_INPUT_SPLITS_Map_1: 1
@@ -707,7 +707,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 50
RECORDS_OUT_OPERATOR_TS_0: 1100
 Stage-1 LLAP IO COUNTERS:
-   CACHE_HIT_BYTES: 1079
+   CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
METADATA_CACHE_HIT: 2
NUM_DECODED_BATCHES: 2
@@ -745,7 +745,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_SEL_2: 318

[13/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
index 008068e..c18133b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
@@ -126,7 +126,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aaa
+reduceColumnNullOrder: zzz
 reduceColumnSortOrder: +++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -224,31 +224,31 @@ LIMIT 25
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
  A masked pattern was here 
-NULL   -2118149242 -7196   56  1969-12-31 15:59:50.462 NULL
-4236298484 0   7196-56 -39 -15242201945432 NULL-56 0
-NULL   -2121399625 -7196   27  1969-12-31 15:59:50.046 NULL
-4242799250 0   7196-27 -10 -15265591701500 NULL-27 0
-NULL   -2124802690 -7196   -6  1969-12-31 15:59:57.92  NULL
-4249605380 0   71966   23  -15290080157240 NULL6   0
-NULL   -2128720310 -7196   -52 1969-12-31 15:59:45.978 NULL
-4257440620 0   719652  69  -15318271350760 NULL52  0
-NULL   -2132232110 -20060  1969-12-31 15:59:47.019 NULL
-4264464220 -200200 -60 -43 -426446422000   NULL-60 0
-NULL   -2132536965 -7196   9   1969-12-31 15:59:46 NULL
-4265073930 0   7196-9  8   -15345736000140 NULL-9  0
-NULL   -2135141157 -7196   50  1969-12-31 15:59:50.192 NULL
-4270282314 0   7196-50 -33 -15364475765772 NULL-50 0
-NULL   -2137537679 -7196   -25 1969-12-31 15:59:50.136 NULL
-4275075358 0   719625  42  -15381721138084 NULL25  0
-NULL   -2145481991 -7196   56  1969-12-31 15:59:55.667 NULL
-4290963982 0   7196-56 -39 -1543407236 NULL-56 0
-NULL   NULL-200-36 1969-12-31 15:59:57.241 NULLNULL-200
200 36  53  NULLNULL36  0
-NULL   NULL-200-43 1969-12-31 15:59:53.783 NULLNULL-200
200 43  60  NULLNULL43  0
-NULL   NULL-200-58 1969-12-31 15:59:51.115 NULLNULL-200
200 58  75  NULLNULL58  0
-NULL   NULL-20022  1969-12-31 15:59:50.109 NULLNULL-200
200 -22 -5  NULLNULL-22 0
-NULL   NULL-2003   1969-12-31 15:59:50.489 NULLNULL-200
200 -3  14  NULLNULL-3  0
-NULL   NULL-20043  1969-12-31 15:59:57.003 NULLNULL-200
200 -43 -26 NULLNULL-43 0
-NULL   NULL-20053  1969-12-31 15:59:49.46  NULLNULL-200
200 -53 -36 NULLNULL-53 0
-NULL   NULL-2009   1969-12-31 15:59:44.108 NULLNULL-200
200 -9  8   NULLNULL-9  0
-NULL   NULL-7196   -38 1969-12-31 15:59:53.503 NULLNULL0   
719638  55  NULLNULL38  0
-NULL   NULL-7196   -49 1969-12-31 15:59:51.009 NULLNULL0   
719649  66  NULLNULL49  0
-NULL   NULL-7196   -49 1969-12-31 15:59:52.052 NULLNULL0   
719649  66  NULLNULL49  0
-NULL   NULL-7196   -50 1969-12-31 15:59:52.424 NULLNULL0   
719650  67  NULLNULL50  0
-NULL   NULL-7196   -61 1969-12-31 15:59:44.823 NULLNULL0   
719661  78  NULLNULL61  0
-NULL   NULL-7196   1   1969-12-31 15:59:48.361 NULLNULL0   
7196-1  16  NULLNULL-1  0
-NULL   NULL-7196   14  1969-12-31 15:59:50.291 NULLNULL0   
7196-14 3   NULLNULL-14 0
-NULL   NULL-7196   22  1969-12-31 15:59:52.699 NULLNULL0   
7196-22 -5  NULLNULL-22 0
+true   NULL-15892  29  1969-12-31 15:59:57.937 821UdmGbkEf4j   NULL
-21515892   -29 -12 NULL171 -29 0
+true   NULL-15899  50  1969-12-31 15:59:46.926 821UdmGbkEf4j   NULL
-22215899   -50 -33 NULL10210   -50 0
+true   NULL-15903  -2  1969-12-31 15:59:46.371 cvLH6Eat2yFsyy7p
NULL-22615903   2   19

[11/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out
--
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out 
b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out
index 6459e29..66af40c 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out
@@ -290,46 +290,46 @@ LIMIT 40
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesparquet
  A masked pattern was here 
-NULL   -55 1969-12-31 16:00:11.38  -55.0   NULL55  -55 0   
-55.0   -0.055.0-4375.415   0.0 55.00.0 -10.175 -55.0   
0.47781818181818186 -55.0   0.0 -55
-NULL   -55 1969-12-31 16:00:11.751 -55.0   NULL55  -55 0   
-55.0   -0.055.0-4375.415   0.0 55.00.0 -10.175 -55.0   
0.47781818181818186 -55.0   0.0 -55
-NULL   -56 1969-12-31 16:00:13.602 -56.0   NULL56  -56 0   
-56.0   -0.056.0-4454.9683  0.0 56.00.0 -10.175 -56.0   
0.4692857142857143  -56.0   0.0 -56
-NULL   -56 1969-12-31 16:00:13.958 -56.0   NULL56  -56 0   
-56.0   -0.056.0-4454.9683  0.0 56.00.0 -10.175 -56.0   
0.4692857142857143  -56.0   0.0 -56
-NULL   -56 1969-12-31 16:00:15.038 -56.0   NULL56  -56 0   
-56.0   -0.056.0-4454.9683  0.0 56.00.0 -10.175 -56.0   
0.4692857142857143  -56.0   0.0 -56
-NULL   -57 1969-12-31 16:00:11.451 -57.0   NULL57  -57 0   
-57.0   -0.057.0-4534.521   0.0 57.00.0 -10.175 -57.0   
0.4610526315789474  -57.0   0.0 -57
-NULL   -57 1969-12-31 16:00:11.883 -57.0   NULL57  -57 0   
-57.0   -0.057.0-4534.521   0.0 57.00.0 -10.175 -57.0   
0.4610526315789474  -57.0   0.0 -57
-NULL   -57 1969-12-31 16:00:12.626 -57.0   NULL57  -57 0   
-57.0   -0.057.0-4534.521   0.0 57.00.0 -10.175 -57.0   
0.4610526315789474  -57.0   0.0 -57
-NULL   -57 1969-12-31 16:00:13.578 -57.0   NULL57  -57 0   
-57.0   -0.057.0-4534.521   0.0 57.00.0 -10.175 -57.0   
0.4610526315789474  -57.0   0.0 -57
-NULL   -57 1969-12-31 16:00:15.39  -57.0   NULL57  -57 0   
-57.0   -0.057.0-4534.521   0.0 57.00.0 -10.175 -57.0   
0.4610526315789474  -57.0   0.0 -57
-NULL   -58 1969-12-31 16:00:12.065 -58.0   NULL58  -58 0   
-58.0   -0.058.0-4614.074   0.0 58.00.0 -10.175 -58.0   
0.4531034482758621  -58.0   0.0 -58
-NULL   -58 1969-12-31 16:00:12.683 -58.0   NULL58  -58 0   
-58.0   -0.058.0-4614.074   0.0 58.00.0 -10.175 -58.0   
0.4531034482758621  -58.0   0.0 -58
-NULL   -58 1969-12-31 16:00:12.948 -58.0   NULL58  -58 0   
-58.0   -0.058.0-4614.074   0.0 58.00.0 -10.175 -58.0   
0.4531034482758621  -58.0   0.0 -58
-NULL   -58 1969-12-31 16:00:14.066 -58.0   NULL58  -58 0   
-58.0   -0.058.0-4614.074   0.0 58.00.0 -10.175 -58.0   
0.4531034482758621  -58.0   0.0 -58
-NULL   -58 1969-12-31 16:00:15.658 -58.0   NULL58  -58 0   
-58.0   -0.058.0-4614.074   0.0 58.00.0 -10.175 -58.0   
0.4531034482758621  -58.0   0.0 -58
-NULL   -59 1969-12-31 16:00:12.008 -59.0   NULL59  -59 0   
-59.0   -0.059.0-4693.627   0.0 59.00.0 -10.175 -59.0   
0.44542372881355935 -59.0   0.0 -59
-NULL   -59 1969-12-31 16:00:13.15  -59.0   NULL59  -59 0   
-59.0   -0.059.0-4693.627   0.0 59.00.0 -10.175 -59.0   
0.44542372881355935 -59.0   0.0 -59
-NULL   -59 1969-12-31 16:00:13.625 -59.0   NULL59  -59 0   
-59.0   -0.059.0-4693.627   0.0 59.00.0 -10.175 -59.0   
0.44542372881355935 -59.0   0.0 -59
-NULL   -59 1969-12-31 16:00:15.296 -59.0   NULL59  -59 0   
-59.0   -0.059.0-4693.627   0.0 59.00.0 -10.175 -59.0   
0.44542372881355935 -59.0   0.0 -59
-NULL   -59 1969-12-31 16:00:15.861 -59.0   NULL59  -59 0   
-59.0   -0.059.0-4693.627   0.0 59.00.0 -10.175 -59.0   
0.44542372881355935 -59.0   0.0 -59
-NULL   -60 1969-12-31 16:00:11.504 -60.0   NULL60  -60 0   
-60.0   -0.060.0-4773.180.0 60.00.0 -10.175 -60.0   
0.438   -60.0   0.0 -60
-NULL   -60 1969-12-31

[10/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/perf/tez/query49.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query49.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query49.q.out
index 6f642ef..5c1889c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query49.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query49.q.out
@@ -307,7 +307,7 @@ Stage-0
 Filter Operator [FIL_347] (rows=8604378 width=88)
   predicate:((_col0 <= 10) or (rank_window_1 <= 
10))
   PTF Operator [PTF_346] (rows=12906568 width=88)
-Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS 
decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS FIRST","partition 
by:":"0"}]
+Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS 
decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition 
by:":"0"}]
 Select Operator [SEL_345] (rows=12906568 
width=88)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
 <-Reducer 23 [SIMPLE_EDGE] vectorized
@@ -316,7 +316,7 @@ Stage-0
 Select Operator [SEL_343] (rows=12906568 
width=88)
   
Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"]
   PTF Operator [PTF_342] (rows=12906568 
width=88)
-Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS 
decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS FIRST","partition 
by:":"0"}]
+Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS 
decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition 
by:":"0"}]
 Select Operator [SEL_341] 
(rows=12906568 width=88)
   
Output:["_col0","_col1","_col2","_col3","_col4"]
 <-Reducer 22 [SIMPLE_EDGE] vectorized
@@ -398,7 +398,7 @@ Stage-0
   Filter Operator [FIL_327] (rows=4302070 
width=135)
 predicate:((_col0 <= 10) or 
(rank_window_1 <= 10))
 PTF Operator [PTF_326] (rows=6453105 
width=135)
-  Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS 
decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS FIRST","partition 
by:":"0"}]
+  Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS 
decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition 
by:":"0"}]
   Select Operator [SEL_325] 
(rows=6453105 width=135)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
   <-Reducer 17 [SIMPLE_EDGE] vectorized
@@ -407,7 +407,7 @@ Stage-0
   Select Operator [SEL_323] 
(rows=6453105 width=135)
 
Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"]
 PTF Operator [PTF_322] 
(rows=6453105 width=135)
-  Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS 
decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS FIRST","partition 
by:":"0"}]
+  Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS 
decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition 
by:":"0"}]
   Select Operator [SEL_321] 
(rows=6453105 width=135)
 
Output:["_col0","_col1","_col2","_col3","_col4"]
   <-Reducer 16 [SIMPLE_EDGE] 
vectorized
@@ -472,7 +472,7 @@ Stage-0
   Filter Operator [FIL_296] (rows=2151150 
width=135)
 predicate:((_col0 <= 10) or 
(rank_window_1 <= 10))
 PTF Operator [PTF_295] (rows=3226726 
width=135)
-  Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS 
decimal(15,4)) /

[07/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
index 9be72ea..70f7401 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out
@@ -130,16 +130,16 @@ POSTHOOK: query: select * from small_alltypesorc3b
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc3b
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:11.799 
truetrue
-NULL   NULL-738747840  -1645852809 NULLNULL
vmAT10eeE47fgH20pLi xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.55  
truefalse
-NULL   NULL-838810013  1864027286  NULLNULLN016jPED08o 
4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:44.252 false   true
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+NULL   -16269  -378213344  -1645852809 NULL-16269.0
sOdj1Tmvbl03f   xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:15.867 false   
false
+NULL   -16274  -671342269  -1645852809 NULL-16274.0
3DE7EQo4KyT0hS  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:51.469 false   
false
+NULL   -16296  -146635689  -1645852809 NULL-16296.0
r251rbt884txX2MNq4MM14  xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:44.809 
false   false
+NULL   -16296  593429004   -1887561756 NULL-16296.0
dhDYJ076SFcC4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:47.422 false   
false
+NULL   -16300  -860437234  -1645852809 NULL-16300.0
Fb2W1r24opqN8m6571p xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:45.815 
truefalse
+NULL   -16306  384405526   -1645852809 NULL-16306.0b5SoK8  
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:11.105 truefalse
+NULL   -16307  559926362   -1645852809 NULL-16307.0
nA8bdtWfPPQyP2hL5   xH7445Rals48VOulSyR5F   NULL1969-12-31 15:59:58.072 
false   false
+NULL   -16309  -826497289  -1645852809 NULL-16309.0
54o058c3mK6ewOQ5xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:10.761 
false   false
+NULL   -16310  206154150   1864027286  NULL-16310.05Hy1y6  
4KWs6gw7lv2WYd66P   NULL1969-12-31 16:00:00.821 false   true
+NULL   -16379  -894716315  1864027286  NULL-16379.0
2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P   NULL1969-12-31 15:59:47.059 
truetrue
 PREHOOK: query: select * from small_alltypesorc4b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc4b
@@ -236,16 +236,16 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
 -64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
 -64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-609074876  -1887561756 NULLNULLEcM71   
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:55.061 truefalse
-NULL   NULL-700300206  -1887561756 NULLNULLkdqQE010
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:58.384 false   false
-NULL   NULL-726473298  1864027286  NULLNULL
OFy1a1xf37f75b5N4KWs6gw7lv2WYd66P

[16/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
index 5e7e8ca..e9e9290 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
@@ -59,6 +59,37 @@ POSTHOOK: query: SELECT * FROM DECIMAL_PRECISION ORDER BY 
`dec`
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@decimal_precision
  A masked pattern was here 
+0.00
+0.00
+0.00
+0.00
+0.00
+0.1234567890
+0.1234567890
+1.2345678901
+1.2345678901
+1.2345678901
+12.3456789012
+12.3456789012
+12.3456789012
+123.4567890123
+123.4567890123
+123.4567890123
+1234.5678901235
+1234.5678901235
+1234.5678901235
+12345.6789012346
+12345.6789012346
+123456.7890123456
+123456.7890123457
+1234567.8901234560
+1234567.8901234568
+12345678.9012345600
+12345678.9012345679
+123456789.0123456000
+123456789.0123456789
+1234567890.123456
+1234567890.1234567890
 NULL
 NULL
 NULL
@@ -103,37 +134,6 @@ NULL
 NULL
 NULL
 NULL
-0.00
-0.00
-0.00
-0.00
-0.00
-0.1234567890
-0.1234567890
-1.2345678901
-1.2345678901
-1.2345678901
-12.3456789012
-12.3456789012
-12.3456789012
-123.4567890123
-123.4567890123
-123.4567890123
-1234.5678901235
-1234.5678901235
-1234.5678901235
-12345.6789012346
-12345.6789012346
-123456.7890123456
-123456.7890123457
-1234567.8901234560
-1234567.8901234568
-12345678.9012345600
-12345678.9012345679
-123456789.0123456000
-123456789.0123456789
-1234567890.123456
-1234567890.1234567890
 PREHOOK: query: SELECT `dec`, `dec` + 1, `dec` - 1 FROM DECIMAL_PRECISION 
ORDER BY `dec`
 PREHOOK: type: QUERY
 PREHOOK: Input: default@decimal_precision
@@ -142,50 +142,6 @@ POSTHOOK: query: SELECT `dec`, `dec` + 1, `dec` - 1 FROM 
DECIMAL_PRECISION ORDER
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@decimal_precision
  A masked pattern was here 
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
-NULL   NULLNULL
 0.00   1.00-1.00
 0.00   1.00-1.00
 0.00   1.00-1.00
@@ -217,14 +173,6 @@ NULL   NULLNULL
 123456789.0123456789   123456790.0123456789123456788.0123456789
 1234567890.123456  1234567891.123456   1234567889.123456
 1234567890.1234567890  1234567891.1234567890   1234567889.1234567890
-PREHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
-PREHOOK: type: QUERY
-PREHOOK: Input: default@decimal_precision
- A masked pattern was here 
-POSTHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@decimal_precision
- A masked pattern was here 
 NULL   NULLNULL
 NULL   NULLNULL
 NULL   NULLNULL
@@ -269,6 +217,14 @@ NULL   NULLNULL
 NULL   NULLNULL
 NULL   NULLNULL
 NULL   NULLNULL
+PREHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_precision
+ A masked pattern was here 
+POSTHOOK: query: SELECT `dec`, `dec` * 2, `dec` / 3  FROM DECIMAL_PRECISION 
ORDER BY `dec`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_precision
+ A masked pattern was here 
 0.00   0.000.
 0.00   0.000.
 0.00   0.000.
@@ -300,6 +256,50 @@ NULL   NULLNULL
 123456789.0123456789   246913578.024691357841152263.004115226300
 1234567890.123456  2469135780.246912   411522630.04115200
 1234567890.1234567890  2469135780.2469135780   411522630.041152263000
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULLNULL
+NULL   NULLNULL
+NULL

[01/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 35f86c749 -> 499d01a38


http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
index 4da63b1..fc7ad07 100644
--- a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
@@ -253,7 +253,7 @@ STAGE PLANS:
   Map Operator Tree:
   TableScan
 alias: alltypesorc_string
-Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
 TableScan Vectorization:
 native: true
 Select Operator
@@ -264,7 +264,7 @@ STAGE PLANS:
   native: true
   projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 
3, 13, 14, 15, 16, 17]
   selectExpressions: VectorUDFUnixTimeStampTimestamp(col 
1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) 
-> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, 
VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, 
VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, 
VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, 
VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, 
VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, 
IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 
08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 
2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, 
IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 
15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: 
col 0:boolean, col 1:timestamp) -> 16:timestam
 p, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 
3:timestamp) -> 17:timestamp
-  Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: bigint)
 sort order: +
@@ -273,7 +273,7 @@ STAGE PLANS:
 native: false
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
 nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
-Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
 value expressions: _col1 (type: int), _col2 (type: int), _col3 
(type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 
(type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: 
timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: 
timestamp), _col15 (type: timestamp), _col16 (type: timestamp)
   Execution mode: vectorized
   Map Vectorization:
@@ -293,10 +293,10 @@ STAGE PLANS:
 Select Operator
   expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: 
int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col2 (type: 
int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: 
int), VALUE._col6 (type: int), VALUE._col7 (type: boolean), VALUE._col8 (type: 
timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), 
VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 
(type: timestamp), VALUE._col14 (type: timestamp)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
-  Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
   File Output Operator
 compressed: false
-Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 52 Data size: 8979 Basic stats: COMPLETE 
Column stats: NONE
 table:
 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
 output format:

[03/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/vector_outer_join3.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out 
b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
index 1f1c131..6cae357 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
@@ -94,11 +94,11 @@ POSTHOOK: query: select * from small_alltypesorc1a_n1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc1a_n1
  A masked pattern was here 
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+-64-10462  626923679   NULL-64.0   -10462.0821UdmGbkEf4j   
NULL1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 trueNULL
+-64-15920  528534767   NULL-64.0   -15920.0
cvLH6Eat2yFsyy7pNULL1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 
trueNULL
+-64-6907   253665376   NULL-64.0   -6907.0 1cGVWH7n1QU NULL
NULL1969-12-31 15:59:53.66  trueNULL
+-64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
+-64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
 PREHOOK: query: select * from small_alltypesorc2a_n1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc2a_n1
@@ -216,16 +216,16 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1
 -51NULLNULL-1874052220 -51.0   NULLc61B47I604gymFJ sjWQS78 
1969-12-31 16:00:08.451 NULLfalse   false
 -51NULLNULL-1927203921 -51.0   NULL45ja5suO42S0I0  
1969-12-31 16:00:08.451 NULLtruetrue
 -51NULLNULL-1970551565 -51.0   NULLr2uhJH3 loXMWyrHjVeK
1969-12-31 16:00:08.451 NULLfalse   false
+-64-10462  626923679   NULL-64.0   -10462.0821UdmGbkEf4j   
NULL1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 trueNULL
+-64-15920  528534767   NULL-64.0   -15920.0
cvLH6Eat2yFsyy7pNULL1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 
trueNULL
+-64-6907   253665376   NULL-64.0   -6907.0 1cGVWH7n1QU NULL
NULL1969-12-31 15:59:53.66  trueNULL
 -64-7196   NULL-1615920595 -64.0   -7196.0 NULLX5rDjl  
1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULLfalse
 -64-7196   NULL-1639157869 -64.0   -7196.0 NULL
IJ0Oj7qAiqNGsN7gn   1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL
false
 -64-7196   NULL-527203677  -64.0   -7196.0 NULL
JBE4H5RoK412Cs260I721969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL
true
 -64-7196   NULL406535485   -64.0   -7196.0 NULLE011i   
1969-12-31 15:59:56.048 1969-12-31 15:59:58.174 NULLfalse
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+-64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
+-64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
 PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1

[15/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
index af69747..a975d8a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
@@ -94,11 +94,11 @@ POSTHOOK: query: select * from small_alltypesorc1a_n1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc1a_n1
  A masked pattern was here 
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+-64-10462  626923679   NULL-64.0   -10462.0821UdmGbkEf4j   
NULL1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 trueNULL
+-64-15920  528534767   NULL-64.0   -15920.0
cvLH6Eat2yFsyy7pNULL1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 
trueNULL
+-64-6907   253665376   NULL-64.0   -6907.0 1cGVWH7n1QU NULL
NULL1969-12-31 15:59:53.66  trueNULL
+-64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
+-64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
 PREHOOK: query: select * from small_alltypesorc2a_n1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc2a_n1
@@ -216,16 +216,16 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1
 -51NULLNULL-1874052220 -51.0   NULLc61B47I604gymFJ sjWQS78 
1969-12-31 16:00:08.451 NULLfalse   false
 -51NULLNULL-1927203921 -51.0   NULL45ja5suO42S0I0  
1969-12-31 16:00:08.451 NULLtruetrue
 -51NULLNULL-1970551565 -51.0   NULLr2uhJH3 loXMWyrHjVeK
1969-12-31 16:00:08.451 NULLfalse   false
+-64-10462  626923679   NULL-64.0   -10462.0821UdmGbkEf4j   
NULL1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 trueNULL
+-64-15920  528534767   NULL-64.0   -15920.0
cvLH6Eat2yFsyy7pNULL1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 
trueNULL
+-64-6907   253665376   NULL-64.0   -6907.0 1cGVWH7n1QU NULL
NULL1969-12-31 15:59:53.66  trueNULL
 -64-7196   NULL-1615920595 -64.0   -7196.0 NULLX5rDjl  
1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULLfalse
 -64-7196   NULL-1639157869 -64.0   -7196.0 NULL
IJ0Oj7qAiqNGsN7gn   1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL
false
 -64-7196   NULL-527203677  -64.0   -7196.0 NULL
JBE4H5RoK412Cs260I721969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL
true
 -64-7196   NULL406535485   -64.0   -7196.0 NULLE011i   
1969-12-31 15:59:56.048 1969-12-31 15:59:58.174 NULLfalse
 -64-7196   NULL658026952   -64.0   -7196.0 NULL4tAur   
1969-12-31 15:59:53.866 1969-12-31 15:59:58.174 NULLtrue
-NULL   NULL-1015272448 -1887561756 NULLNULLjTQ68531mP  
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 15:59:45.854 false   false
-NULL   NULL-850295959  -1887561756 NULLNULLWMIgGA73
4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:00.348 false   false
-NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
-NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
-NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
+-64-8080   528534767   NULL-64.0   -8080.0 cvLH6Eat2yFsyy7p
NULL1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 trueNULL
+-64-9842   253665376   NULL-64.0   -9842.0 1cGVWH7n1QU NULL
1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 trueNULL
 PREHOOK: query: explain vectorization detail formatted
 select count(*)

[20/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed 
by Jesus Camacho Rodriguez)

Signed-off-by: Teddy Choi 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/499d01a3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/499d01a3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/499d01a3

Branch: refs/heads/master
Commit: 499d01a38d34fc1db51a04b4e9d24281ef4fe4d3
Parents: 35f86c7
Author: Teddy Choi 
Authored: Fri Sep 14 11:48:20 2018 +0900
Committer: Teddy Choi 
Committed: Fri Sep 14 11:48:21 2018 +0900

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |2 +
 .../write_final_output_blobstore.q.out  |4 +-
 .../hive/ql/parse/BaseSemanticAnalyzer.java |9 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java|4 +-
 .../apache/hadoop/hive/ql/parse/HiveParser.g|   14 +-
 .../hadoop/hive/ql/plan/PTFDeserializer.java|4 +-
 .../hive/ql/udf/generic/GenericUDAFAverage.java |   16 +-
 .../ql/udf/generic/GenericUDAFEvaluator.java|   11 +-
 .../hive/ql/udf/generic/GenericUDAFSum.java |   18 +-
 .../hive/ql/udf/ptf/BasePartitionEvaluator.java |   49 +-
 .../hive/ql/udf/ptf/TableFunctionEvaluator.java |9 +
 .../hive/ql/udf/ptf/TableFunctionResolver.java  |5 +-
 .../hive/ql/udf/ptf/ValueBoundaryScanner.java   |   93 +-
 .../hive/ql/udf/ptf/WindowingTableFunction.java |4 +-
 .../udf/generic/TestGenericUDAFEvaluator.java   |9 +-
 .../clientpositive/beeline/smb_mapjoin_13.q.out |4 +-
 ...names_with_leading_and_trailing_spaces.q.out |2 +-
 .../clientpositive/correlationoptimizer14.q.out |   37 -
 .../results/clientpositive/ctas_colname.q.out   |4 +-
 .../test/results/clientpositive/decimal_3.q.out |8 +-
 .../test/results/clientpositive/decimal_4.q.out |4 +-
 .../test/results/clientpositive/decimal_5.q.out |8 +-
 .../test/results/clientpositive/decimal_6.q.out |   12 +-
 .../clientpositive/decimal_precision.q.out  |  286 ++---
 .../results/clientpositive/decimal_serde.q.out  |4 +-
 .../clientpositive/delete_all_partitioned.q.out |2 +-
 .../clientpositive/distinct_windowing.q.out |6 +-
 .../distinct_windowing_no_cbo.q.out |   10 +-
 .../groupby_grouping_window.q.out   |2 +-
 .../results/clientpositive/input_part7.q.out|4 +-
 .../insert_values_non_partitioned.q.out |2 +-
 .../clientpositive/limit_pushdown2.q.out|   40 +-
 .../clientpositive/llap/acid_no_buckets.q.out   |   16 +-
 .../llap/acid_vectorization_original.q.out  |2 +-
 .../clientpositive/llap/bucketmapjoin7.q.out|2 +-
 .../results/clientpositive/llap/cbo_limit.q.out |8 +-
 .../clientpositive/llap/cbo_rp_limit.q.out  |8 +-
 ...names_with_leading_and_trailing_spaces.q.out |2 +-
 .../llap/delete_all_partitioned.q.out   |2 +-
 .../clientpositive/llap/explainuser_1.q.out |   88 +-
 .../llap/groupby_resolution.q.out   |2 +-
 .../llap/insert_into_with_schema.q.out  |4 +-
 .../llap/insert_values_non_partitioned.q.out|2 +-
 .../clientpositive/llap/limit_pushdown.q.out|8 +-
 .../clientpositive/llap/limit_pushdown3.q.out   |8 +-
 .../results/clientpositive/llap/lineage2.q.out  |2 +-
 .../results/clientpositive/llap/lineage3.q.out  |   10 +-
 .../results/clientpositive/llap/llap_acid.q.out |6 +-
 .../clientpositive/llap/llap_acid_fast.q.out|6 +-
 .../clientpositive/llap/llap_smb_ptf.q.out  |4 +-
 .../llap/offset_limit_ppd_optimizer.q.out   |8 +-
 .../clientpositive/llap/orc_llap_counters.q.out |   82 +-
 .../llap/orc_llap_counters1.q.out   |   10 +-
 .../clientpositive/llap/orc_ppd_basic.q.out |  100 +-
 .../llap/orc_ppd_schema_evol_3a.q.out   |  112 +-
 .../clientpositive/llap/order_null.q.out|4 +-
 .../test/results/clientpositive/llap/ptf.q.out  |  140 +--
 .../clientpositive/llap/ptf_matchpath.q.out |6 +-
 .../clientpositive/llap/ptf_streaming.q.out |   94 +-
 .../clientpositive/llap/sharedworkext.q.out |4 +-
 .../clientpositive/llap/skewjoinopt15.q.out |4 +-
 .../clientpositive/llap/smb_mapjoin_15.q.out|8 +-
 .../clientpositive/llap/subquery_in.q.out   |4 +-
 .../llap/subquery_in_having.q.out   |2 +-
 .../clientpositive/llap/subquery_notin.q.out|   14 +-
 .../clientpositive/llap/subquery_scalar.q.out   |4 +-
 .../llap/tez_dynpart_hashjoin_1.q.out   |4 +-
 .../llap/tez_dynpart_hashjoin_2.q.out   |   12 +-
 .../llap/tez_fixed_bucket_pruning.q.out |   24 +-
 .../llap/tez_vector_dynpart_hashjoin_1.q.out|4 +-
 .../llap/tez_vector_dynpart_hashjoin_2.q.out|   12 +-
 .../llap/update_all_partitioned.q.out   |2 +-
 .../clientpositive/llap/update_tmp_table.q.out  |2 +-

[19/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out 
b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
index 4cfb1d9..8fadc45 100644
--- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
@@ -1314,7 +1314,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1521,7 +1521,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1771,7 +1771,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1794,7 +1794,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -2106,7 +2106,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -2313,7 +2313,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -2564,7 +2564,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false
@@ -2587,7 +2587,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: a
+reduceColumnNullOrder: z
 reduceColumnSortOrder: +
 allNative: false
 usesVectorUDFAdaptor: false

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out 
b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index 00be86c..9178652 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -97,7 +97,6 @@ POSTHOOK: query: select distinct si, si%4 from over10k_n2 
order by si
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k_n2
  A masked pattern was here 
-NULL   NULL
 2560
 2571
 2582
@@ -350,6 +349,7 @@ NULLNULL
 5091
 5102
 5113
+NULL   NULL
 PREHOOK: query: insert into over10k_orc_bucketed select * from

[12/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
index 748dea1..beaf86f 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
@@ -200,7 +200,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: 
int, _col7: double
 partition by: _col2
 raw input shape:
@@ -216,7 +216,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -244,7 +244,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -515,7 +515,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -547,7 +547,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -708,7 +708,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -867,7 +867,7 @@ STAGE PLANS:
   Partition table definition
 input alias: abc
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: 
int, _col7: double
 partition by: _col2
 raw input shape:
@@ -883,7 +883,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -911,7 +911,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -1119,7 +1119,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -1151,7 +1151,7 @@ STAGE PLANS:
   Windowing table definition

[08/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out 
b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
index 1a8ee5b..1a8583d 100644
--- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
@@ -2128,7 +2128,7 @@ Stage-0
 Filter Operator [FIL_23] (rows=26 width=491)
   predicate:first_value_window_0 is not null
   PTF Operator [PTF_10] (rows=26 width=491)
-Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS 
FIRST","partition by:":"_col2"}]
+Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS 
LAST","partition by:":"_col2"}]
 Select Operator [SEL_9] (rows=26 width=491)
   Output:["_col1","_col2","_col5"]
 <-Map 4 [PARTITION-LEVEL SORT]
@@ -2558,7 +2558,7 @@ Stage-0
 Select Operator [SEL_4] (rows=20 width=64)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"]
   PTF Operator [PTF_3] (rows=20 width=621)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col0"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}]
 Select Operator [SEL_2] (rows=20 width=621)
   Output:["_col0","_col1","_col2","_col3"]
 <-Map 1 [PARTITION-LEVEL SORT]
@@ -2585,7 +2585,7 @@ Stage-0
 Select Operator [SEL_4] (rows=25 width=179)
   Output:["_col0","_col1","_col2"]
   PTF Operator [PTF_3] (rows=25 width=443)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col0"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col0"}]
 Select Operator [SEL_2] (rows=25 width=443)
   Output:["_col0","_col1"]
 <-Map 1 [PARTITION-LEVEL SORT]
@@ -4057,14 +4057,14 @@ Stage-0
 Select Operator [SEL_7] (rows=26 width=239)
   Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
   PTF Operator [PTF_6] (rows=26 width=499)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}]
 Select Operator [SEL_5] (rows=26 width=499)
   Output:["_col1","_col2","_col5","_col7"]
 <-Reducer 2 [PARTITION-LEVEL SORT]
   PARTITION-LEVEL SORT [RS_4]
 PartitionCols:_col2
 PTF Operator [PTF_3] (rows=26 width=499)
-  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition 
by:":"_col2"}}]
+  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition 
by:":"_col2"}}]
   Select Operator [SEL_2] (rows=26 width=499)
 Output:["_col1","_col2","_col5","_col7"]
   <-Map 1 [PARTITION-LEVEL SORT]
@@ -4102,14 +4102,14 @@ Stage-0
 Select Operator [SEL_14] (rows=27 width=227)
   Output:["_col0","_col1","_col2","_col3"]
   PTF Operator [PTF_13] (rows=27 width=223)
-Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}]
+Function definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1 ASC NULLS LAST","partition by:":"_col2"}]
 Select Operator [SEL_12] (rows=27 width=223)
   Output:["_col1","_col2","_col5"]
 <-Reducer 2 [PARTITION-LEVEL SORT]
   PARTITION-LEVEL SORT [RS_11]
 PartitionCols:_col2
 PTF Operator [PTF_10] (rows=27 width=223)
-  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition 
by:":"_col2"}}]
+  Function definitions:[{},{"Partition table 
definition":{"name:":"noop","order by:":"_col1 ASC NULLS LAST","partition 
by:":"_col2"}}]
   Select Operator [SEL_9] (rows=27 width=223)
 Output:["_col1","_col2","_col5"]
   <-Map 1 [PARTITION-LEVEL SORT]
@@ -4167,14 +4167,14 @@ Stage-0
 Select

[09/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out 
b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
index f448a3e..7de4d38 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
@@ -291,8 +291,8 @@ from alltypesparquet where cbigint > 0 and cbigint < 
1 order by s1, s2 l
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesparquet
  A masked pattern was here 
--985319NULL-0.01217879691754650
 -9853192.0297994862577501E-4   -0.01217879691754650
+-985319NULL-0.01217879691754650
 -63925 0.11256941728588189 -0.18771998435666797
 0  NULLNULL
 0  NULLNULL
@@ -517,8 +517,8 @@ POSTHOOK: Input: default@alltypesparquet
 -273.0 6028764.868131869   1.0 6028764.868131869   
-0.01098901098901099-0.004395604395604396
 -257.0 6404096.533073931.0 6404096.53307393
-0.011673151750972763   -0.004669260700389105
 -250.0 6583411.236 1.0 6583411.236 -0.012  -0.0048
--247.0 NULL1.0 NULL-0.012145748987854251   -0.004858299595141701
 -247.0 -7546669.174089069  1.0 -7546669.174089069  
-0.012145748987854251   -0.004858299595141701
+-247.0 NULL1.0 NULL-0.012145748987854251   -0.004858299595141701
 -246.0 NULL1.0 NULL-0.012195121951219513   -0.004878048780487805
 -237.0 NULL1.0 NULL-0.012658227848101266   -0.005063291139240506
 -236.0 NULL1.0 NULL-0.012711864406779662   -0.005084745762711864
@@ -546,18 +546,18 @@ POSTHOOK: Input: default@alltypesparquet
 -132.0 NULL1.0 NULL-0.022727272727272728   -0.00909090909090909
 -129.0 1.2758548906976745E71.0 1.2758548906976745E7
-0.023255813953488372   -0.009302325581395349
 -128.0 NULL1.0 NULL-0.0234375  -0.009375
--126.0 NULL1.0 NULL-0.023809523809523808   -0.009523809523809523
 -126.0 -1.4793867349206349E7   1.0 -1.4793867349206349E7   
-0.023809523809523808   -0.009523809523809523
+-126.0 NULL1.0 NULL-0.023809523809523808   -0.009523809523809523
 -116.0 NULL1.0 NULL-0.02586206896551724-0.010344827586206896
--113.0 NULL1.0 NULL-0.02654867256637168-0.010619469026548672
 -113.0 -1.6495816690265486E7   1.0 -1.6495816690265486E7   
-0.02654867256637168-0.010619469026548672
+-113.0 NULL1.0 NULL-0.02654867256637168-0.010619469026548672
 -96.0  NULL1.0 NULL-0.03125-0.012499
 -94.0  -1.9830077510638297E7   1.0 -1.9830077510638297E7   
-0.031914893617021274   -0.01276595744680851
 -93.0  NULL1.0 NULL-0.03225806451612903-0.012903225806451613
 -77.0  2.4513789038961038E71.0 2.4513789038961038E7
-0.03896103896103896-0.015584415584415584
 -69.0  2.735596747826087E7 1.0 2.735596747826087E7 
-0.043478260869565216   -0.017391304347826087
--62.0  NULL1.0 NULL-0.04838709677419355-0.01935483870967742
 -62.0  3.0444544451612905E71.0 3.0444544451612905E7
-0.04838709677419355-0.01935483870967742
+-62.0  NULL1.0 NULL-0.04838709677419355-0.01935483870967742
 -60.0  NULL1.0 NULL-0.05   -0.02
 -57.0  -3.27022330877193E7 1.0 -3.27022330877193E7 
-0.05263157894736842-0.021052631578947368
 -49.0  3.35888328367347E7  1.0 3.35888328367347E7  
-0.061224489795918366   -0.024489795918367346

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out 
b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
index b2c0a64..97194a2 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
@@ -150,7 +150,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: zz
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -307,7 +307,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS

[14/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
index 5ea866b..d470240 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
@@ -134,7 +134,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col7 ASC NULLS FIRST
+order by: _col7 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -311,7 +311,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -339,7 +339,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col7 ASC NULLS FIRST
+order by: _col7 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -520,7 +520,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col6 ASC NULLS FIRST, _col7 ASC NULLS 
FIRST, _col1 ASC NULLS FIRST, _col4 DESC NULLS LAST
+order by: _col6 ASC NULLS LAST, _col7 ASC NULLS LAST, 
_col1 ASC NULLS LAST, _col4 DESC NULLS LAST
 partition by: _col0
 raw input shape:
 window functions:
@@ -743,7 +743,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col2 ASC NULLS FIRST, _col7 ASC NULLS FIRST
+order by: _col2 ASC NULLS LAST, _col7 ASC NULLS LAST
 partition by: _col1
 raw input shape:
 window functions:
@@ -966,7 +966,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST, _col7 ASC NULLS 
FIRST, _col5 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST, _col7 ASC NULLS LAST, 
_col5 ASC NULLS LAST
 partition by: _col3
 raw input shape:
 window functions:
@@ -1190,7 +1190,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col3 ASC NULLS FIRST
+order by: _col3 ASC NULLS LAST
 partition by: _col4
 raw input shape:
 window functions:
@@ -1397,7 +1397,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: za
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -1425,7 +1425,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col2 ASC NULLS FIRST
+order by: _col2 ASC NULLS LAST
 partition by: _col2, _col4
 raw input shape:
 window functions:
@@ -1483,32 +1483,32 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
  A masked pattern was here 
 p_mfgr avg_window_0
-Manufacturer#1 1753.76
-Manufacturer#3 1410.39
-Manufacturer#4 1620.67
-Manufacturer#5 1018.1
-Manufacturer#5 1788.73
-Manufacturer#1 1173.15

[05/20] hive git commit: HIVE-20423: Set NULLS LAST as the default null ordering (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-09-13 Thread tchoi

http://git-wip-us.apache.org/repos/asf/hive/blob/499d01a3/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out 
b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index 0c48310..9eb1a2c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -197,7 +197,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: 
int, _col7: double
 partition by: _col2
 raw input shape:
@@ -213,7 +213,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -241,7 +241,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -512,7 +512,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -543,7 +543,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -701,7 +701,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -857,7 +857,7 @@ STAGE PLANS:
   Partition table definition
 input alias: abc
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: 
int, _col7: double
 partition by: _col2
 raw input shape:
@@ -873,7 +873,7 @@ STAGE PLANS:
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-reduceColumnNullOrder: aa
+reduceColumnNullOrder: az
 reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
@@ -901,7 +901,7 @@ STAGE PLANS:
   Windowing table definition
 input alias: ptf_1
 name: windowingtablefunction
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 partition by: _col2
 raw input shape:
 window functions:
@@ -1106,7 +1106,7 @@ STAGE PLANS:
   Partition table definition
 input alias: ptf_1
 name: noop
-order by: _col1 ASC NULLS FIRST
+order by: _col1 ASC NULLS LAST
 output shape: _col1: string, _col2: string, _col5: int
 partition by: _col2
 raw input shape:
@@ -1137,7 +1137,7 @@ STAGE PLANS:
   Windowing table definition

hive git commit: HIVE-20044: Arrow Serde should pad char values and handle empty strings correctly (Teddy Choi, reviewed by Matt McCline)

2018-09-02 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 57f40f71f -> 804535275


HIVE-20044: Arrow Serde should pad char values and handle empty strings 
correctly (Teddy Choi, reviewed by Matt McCline)

Signed-off-by: Teddy Choi 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/80453527
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/80453527
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/80453527

Branch: refs/heads/master
Commit: 804535275e2950a57d1a3260d6a48290171dd277
Parents: 57f40f7
Author: Teddy Choi 
Authored: Mon Sep 3 11:51:40 2018 +0900
Committer: Teddy Choi 
Committed: Mon Sep 3 11:51:40 2018 +0900

--
 .../hadoop/hive/ql/io/arrow/Serializer.java | 144 +++
 .../io/arrow/TestArrowColumnarBatchSerDe.java   |  29 +++-
 .../ql/exec/vector/expressions/StringExpr.java  |  15 ++
 3 files changed, 130 insertions(+), 58 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/80453527/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
index 08e0fb2..6b31045 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
@@ -58,12 +58,14 @@ import 
org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
@@ -75,6 +77,7 @@ import org.apache.arrow.memory.BufferAllocator;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ARROW_BATCH_SIZE;
@@ -92,6 +95,7 @@ import static 
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFr
 
 public class Serializer {
   private final int MAX_BUFFERED_ROWS;
+  private final static byte[] EMPTY_BYTES = new byte[0];
 
   // Hive columns
   private final VectorizedRowBatch vectorizedRowBatch;
@@ -393,7 +397,7 @@ public class Serializer {
 case BOOLEAN:
 {
   if(isNative) {
-  writeGeneric(arrowVector, hiveVector, size, 
vectorizedRowBatch.selectedInUse, vectorizedRowBatch.selected, boolNullSetter, 
boolValueSetter);
+  writeGeneric(arrowVector, hiveVector, size, 
vectorizedRowBatch.selectedInUse, vectorizedRowBatch.selected, boolNullSetter, 
boolValueSetter, typeInfo);
 return;
   }
   final BitVector bitVector = (BitVector) arrowVector;
@@ -401,7 +405,7 @@ public class Serializer {
 if (hiveVector.isNull[i]) {
   boolNullSetter.accept(i, arrowVector, hiveVector);
 } else {
-  boolValueSetter.accept(i, i, arrowVector, hiveVector);
+  boolValueSetter.accept(i, i, arrowVector, hiveVector, typeInfo);
 }
   }
 }
@@ -409,7 +413,7 @@ public class Serializer {
 case BYTE:
 {
   if(isNative) {
-writeGeneric(arrowVector, hiveVector, size, 
vectorizedRowBatch.selectedInUse, vectorizedRowBatch.selected, byteNullSetter, 
byteValueSetter);
+writeGeneric(arrowVector, hiveVector, size, 
vectorizedRowBatch.selectedInUse, vectorizedRowBatch.selected, byteNullSetter, 
byteValueSetter, typeInfo);
 return;
   }
   final TinyIntVector tinyIntVector = (TinyIntVector) arrowVector;
@@ -417,7 +421,7 @@ public class Serializer {
 if (hiveVector.isNull[i]) {
   byteNullSetter.accept(i, arrowVector, hiveVector);
 } else {
-  byteValueSetter.accept(i, i, arrowVector, hiveVector);
+  byteValueSetter.accept(i, i, arrowVector, hiveVector, typeInfo);
 }
   }
 }
@@ -425,7 +429,7 @@ public class Serializer {
 case SHORT:
 {
   if(isNative) {
-writeGeneric(arrowVector,

[hive] Git Push Summary

2018-08-26 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/HIVE-20445 [deleted] a4ef8df46

hive git commit: Working

2018-08-23 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/HIVE-20445 [created] a4ef8df46


Working


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4ef8df4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4ef8df4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4ef8df4

Branch: refs/heads/HIVE-20445
Commit: a4ef8df46a5b881dde9895aab7c60ff24185b10a
Parents: 611770d
Author: Teddy Choi 
Authored: Fri Aug 24 13:21:35 2018 +0900
Committer: Teddy Choi 
Committed: Fri Aug 24 13:21:35 2018 +0900

--
 .../ql/io/arrow/ArrowColumnarBatchSerDe.java|  17 +-
 .../hadoop/hive/ql/io/arrow/Deserializer.java   |  22 +-
 .../hadoop/hive/ql/io/arrow/Serializer.java |  33 +--
 .../ql/exec/vector/VectorRandomRowSource.java   |  55 -
 .../io/arrow/TestArrowColumnarBatchSerDe.java   | 213 +++
 5 files changed, 134 insertions(+), 206 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
index ed82d2d..0dd959e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java
@@ -233,17 +233,16 @@ public class ArrowColumnarBatchSerDe extends 
AbstractSerDe {
   }
 
   static ListColumnVector toStructListVector(MapColumnVector mapVector) {
-final StructColumnVector structVector;
-final ListColumnVector structListVector;
-structVector = new StructColumnVector();
-structVector.fields = new ColumnVector[] {mapVector.keys, 
mapVector.values};
-structListVector = new ListColumnVector();
-structListVector.child = structVector;
-structListVector.childCount = mapVector.childCount;
+final StructColumnVector structVector =
+new StructColumnVector(mapVector.childCount, mapVector.keys, 
mapVector.values);
+final ListColumnVector structListVector =
+new ListColumnVector(mapVector.isNull.length, structVector);
 structListVector.isRepeating = mapVector.isRepeating;
 structListVector.noNulls = mapVector.noNulls;
-System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, 
mapVector.childCount);
-System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, 
mapVector.childCount);
+
+System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, 
mapVector.offsets.length);
+System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, 
mapVector.lengths.length);
+System.arraycopy(mapVector.isNull, 0, structListVector.isNull, 0, 
mapVector.isNull.length);
 return structListVector;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
index edc4b39..51de786 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java
@@ -136,15 +136,13 @@ class Deserializer {
   case LIST:
 readList(arrowVector, (ListColumnVector) hiveVector, (ListTypeInfo) 
typeInfo);
 break;
-  case MAP:
-readMap(arrowVector, (MapColumnVector) hiveVector, (MapTypeInfo) 
typeInfo);
-break;
   case STRUCT:
 readStruct(arrowVector, (StructColumnVector) hiveVector, 
(StructTypeInfo) typeInfo);
 break;
   case UNION:
 readUnion(arrowVector, (UnionColumnVector) hiveVector, (UnionTypeInfo) 
typeInfo);
 break;
+  case MAP:
   default:
 throw new IllegalArgumentException();
 }
@@ -407,24 +405,6 @@ class Deserializer {
 }
   }
 
-  private void readMap(FieldVector arrowVector, MapColumnVector hiveVector, 
MapTypeInfo typeInfo) {
-final int size = arrowVector.getValueCount();
-final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(typeInfo);
-final ListColumnVector mapStructListVector = 
toStructListVector(hiveVector);
-final StructColumnVector mapStructVector = (StructColumnVector) 
mapStructListVector.child;
-
-read(arrowVector, mapStructListVector, mapStructListTypeInfo);
-
-hiveVector.isRepeating = mapStructListVector.isRepeating;
-hiveVector.childCount = mapStructListVector.childCount;
-hiveVector.noNulls = mapStructListVector.noNulls;
-hiveVector.keys = mapStructVector.fields[0];
-

hive git commit: HIVE-20368: Remove VectorTopNKeyOperator lock (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-08-17 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/branch-3 f0b20bfe6 -> 6bf1d8b64


HIVE-20368: Remove VectorTopNKeyOperator lock (Teddy Choi, reviewed by Jesus 
Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6bf1d8b6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6bf1d8b6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6bf1d8b6

Branch: refs/heads/branch-3
Commit: 6bf1d8b644eadd69db026317ad46430e9f9500f8
Parents: f0b20bf
Author: Teddy Choi 
Authored: Fri Aug 17 22:40:09 2018 +0900
Committer: Teddy Choi 
Committed: Fri Aug 17 22:40:09 2018 +0900

--
 .../hadoop/hive/ql/exec/TopNKeyOperator.java|  62 ++-
 .../ql/exec/vector/VectorTopNKeyOperator.java   | 163 +--
 2 files changed, 23 insertions(+), 202 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/6bf1d8b6/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
index 3dfeeaf..4734824 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.TopNKeyDesc;
@@ -47,21 +46,8 @@ public class TopNKeyOperator extends Operator 
implements Serializab
   // Priority queue that holds occurred keys
   private transient PriorityQueue priorityQueue;
 
-  // Fast key wrapper in input format for fast comparison
   private transient KeyWrapper keyWrapper;
 
-  // Standard key wrapper in standard format for output
-  private transient KeyWrapper standardKeyWrapper;
-
-  // Maximum number of rows
-  private transient int rowLimit;
-
-  // Current number of rows
-  private transient int rowSize;
-
-  // Rows
-  private transient Object[] rows;
-
   /** Kryo ctor. */
   public TopNKeyOperator() {
 super();
@@ -103,7 +89,8 @@ public class TopNKeyOperator extends Operator 
implements Serializab
 }
 
 ObjectInspector rowInspector = inputObjInspectors[0];
-outputObjInspector = 
ObjectInspectorUtils.getStandardObjectInspector(rowInspector);
+ObjectInspector standardObjInspector = 
ObjectInspectorUtils.getStandardObjectInspector(rowInspector);
+outputObjInspector = rowInspector;
 
 // init keyFields
 int numKeys = conf.getKeyColumns().size();
@@ -117,25 +104,26 @@ public class TopNKeyOperator extends 
Operator implements Serializab
   keyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf);
   keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
   standardKeyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf);
-  standardKeyObjectInspectors[i] = 
standardKeyFields[i].initialize(outputObjInspector);
+  standardKeyObjectInspectors[i] = 
standardKeyFields[i].initialize(standardObjInspector);
 }
 
 priorityQueue = new PriorityQueue<>(topN + 1, new 
TopNKeyOperator.KeyWrapperComparator(
 standardKeyObjectInspectors, standardKeyObjectInspectors, 
columnSortOrderIsDesc));
 
-keyWrapper = new KeyWrapperFactory(keyFields, keyObjectInspectors,
-standardKeyObjectInspectors).getKeyWrapper();
-standardKeyWrapper = new KeyWrapperFactory(standardKeyFields, 
standardKeyObjectInspectors,
-standardKeyObjectInspectors).getKeyWrapper();
-
-rowLimit = VectorizedRowBatch.DEFAULT_SIZE;
-rows = new Object[rowLimit];
-rowSize = 0;
+KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, 
keyObjectInspectors,
+standardKeyObjectInspectors);
+keyWrapper = keyWrapperFactory.getKeyWrapper();
   }
 
   @Override
   public void process(Object row, int tag) throws HiveException {
-keyWrapper.getNewKey(row, inputObjInspectors[0]);
+if (canProcess(row, tag)) {
+  forward(row, outputObjInspector);
+}
+  }
+
+  protected boolean canProcess(Object row, int tag) throws HiveException {
+keyWrapper.getNewKey(row, inputObjInspectors[tag]);
 keyWrapper.setHashKey();
 
 if (!priorityQueue.contains(keyWrapper)) {
@@ -145,32 +133,12 @@ public class TopNKeyOperator extends 
Operator implements Serializab
   priorityQueue.poll();
 }
 
-rows[rowSize] = ObjectInspectorUtils.copyToStandardObject(row, 
inputObjInspectors[0]);
-rowSize++;
-
-if (rowSize % rowLimit == 0) {
-

hive git commit: HIVE-20368: Remove VectorTopNKeyOperator lock (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

2018-08-17 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master ccdcc5e2e -> 513ee73b7


HIVE-20368: Remove VectorTopNKeyOperator lock (Teddy Choi, reviewed by Jesus 
Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/513ee73b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/513ee73b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/513ee73b

Branch: refs/heads/master
Commit: 513ee73b77a86c036fbcc424bfc5c70da817c98b
Parents: ccdcc5e
Author: Teddy Choi 
Authored: Fri Aug 17 22:31:09 2018 +0900
Committer: Teddy Choi 
Committed: Fri Aug 17 22:31:09 2018 +0900

--
 .../hadoop/hive/ql/exec/TopNKeyOperator.java|  62 ++-
 .../ql/exec/vector/VectorTopNKeyOperator.java   | 163 +--
 2 files changed, 23 insertions(+), 202 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/513ee73b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
index 3dfeeaf..4734824 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.TopNKeyDesc;
@@ -47,21 +46,8 @@ public class TopNKeyOperator extends Operator 
implements Serializab
   // Priority queue that holds occurred keys
   private transient PriorityQueue priorityQueue;
 
-  // Fast key wrapper in input format for fast comparison
   private transient KeyWrapper keyWrapper;
 
-  // Standard key wrapper in standard format for output
-  private transient KeyWrapper standardKeyWrapper;
-
-  // Maximum number of rows
-  private transient int rowLimit;
-
-  // Current number of rows
-  private transient int rowSize;
-
-  // Rows
-  private transient Object[] rows;
-
   /** Kryo ctor. */
   public TopNKeyOperator() {
 super();
@@ -103,7 +89,8 @@ public class TopNKeyOperator extends Operator 
implements Serializab
 }
 
 ObjectInspector rowInspector = inputObjInspectors[0];
-outputObjInspector = 
ObjectInspectorUtils.getStandardObjectInspector(rowInspector);
+ObjectInspector standardObjInspector = 
ObjectInspectorUtils.getStandardObjectInspector(rowInspector);
+outputObjInspector = rowInspector;
 
 // init keyFields
 int numKeys = conf.getKeyColumns().size();
@@ -117,25 +104,26 @@ public class TopNKeyOperator extends 
Operator implements Serializab
   keyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf);
   keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
   standardKeyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf);
-  standardKeyObjectInspectors[i] = 
standardKeyFields[i].initialize(outputObjInspector);
+  standardKeyObjectInspectors[i] = 
standardKeyFields[i].initialize(standardObjInspector);
 }
 
 priorityQueue = new PriorityQueue<>(topN + 1, new 
TopNKeyOperator.KeyWrapperComparator(
 standardKeyObjectInspectors, standardKeyObjectInspectors, 
columnSortOrderIsDesc));
 
-keyWrapper = new KeyWrapperFactory(keyFields, keyObjectInspectors,
-standardKeyObjectInspectors).getKeyWrapper();
-standardKeyWrapper = new KeyWrapperFactory(standardKeyFields, 
standardKeyObjectInspectors,
-standardKeyObjectInspectors).getKeyWrapper();
-
-rowLimit = VectorizedRowBatch.DEFAULT_SIZE;
-rows = new Object[rowLimit];
-rowSize = 0;
+KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, 
keyObjectInspectors,
+standardKeyObjectInspectors);
+keyWrapper = keyWrapperFactory.getKeyWrapper();
   }
 
   @Override
   public void process(Object row, int tag) throws HiveException {
-keyWrapper.getNewKey(row, inputObjInspectors[0]);
+if (canProcess(row, tag)) {
+  forward(row, outputObjInspector);
+}
+  }
+
+  protected boolean canProcess(Object row, int tag) throws HiveException {
+keyWrapper.getNewKey(row, inputObjInspectors[tag]);
 keyWrapper.setHashKey();
 
 if (!priorityQueue.contains(keyWrapper)) {
@@ -145,32 +133,12 @@ public class TopNKeyOperator extends 
Operator implements Serializab
   priorityQueue.poll();
 }
 
-rows[rowSize] = ObjectInspectorUtils.copyToStandardObject(row, 
inputObjInspectors[0]);
-rowSize++;
-
-if (rowSize % rowLimit == 0) {
-  processRows();

hive git commit: HIVE-20203: Arrow SerDe leaks a DirectByteBuffer (Eric Wohlstadter, reviewed by Teddy Choi)

2018-07-25 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 9d78fac36 -> 2820fc4c6


HIVE-20203: Arrow SerDe leaks a DirectByteBuffer (Eric Wohlstadter, reviewed by 
Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2820fc4c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2820fc4c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2820fc4c

Branch: refs/heads/master
Commit: 2820fc4c6d576bb9543bb627ab6f182f17a5c771
Parents: 9d78fac
Author: Teddy Choi 
Authored: Thu Jul 26 12:03:03 2018 +0900
Committer: Teddy Choi 
Committed: Thu Jul 26 12:03:03 2018 +0900

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  2 ++
 .../hadoop/hive/llap/LlapArrowRecordWriter.java | 25 
 .../hive/llap/LlapOutputFormatService.java  |  4 +---
 .../hive/llap/WritableByteChannelAdapter.java   | 13 ++
 .../hive/ql/io/arrow/ArrowWrapperWritable.java  | 19 +++
 .../hadoop/hive/ql/io/arrow/Serializer.java | 17 ++---
 6 files changed, 65 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2820fc4c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 18696ad..15217e7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2657,6 +2657,8 @@ public class HiveConf extends Configuration {
 // For Arrow SerDe
 HIVE_ARROW_ROOT_ALLOCATOR_LIMIT("hive.arrow.root.allocator.limit", 
Long.MAX_VALUE,
 "Arrow root allocator memory size limitation in bytes."),
+HIVE_ARROW_BATCH_ALLOCATOR_LIMIT("hive.arrow.batch.allocator.limit", 
10_000_000_000L,
+"Max bytes per arrow batch. This is a threshold, the memory is not 
pre-allocated."),
 HIVE_ARROW_BATCH_SIZE("hive.arrow.batch.size", 1000, "The number of rows 
sent in one Arrow batch."),
 
 // For Druid storage handler

http://git-wip-us.apache.org/repos/asf/hive/blob/2820fc4c/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java 
b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
index 1b3a3eb..9ee1048 100644
--- a/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
+++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapArrowRecordWriter.java
@@ -20,11 +20,12 @@ package org.apache.hadoop.hive.llap;
 
 import java.io.IOException;
 
+import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.ipc.ArrowStreamWriter;
 import org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable;
+import org.apache.arrow.vector.complex.NullableMapVector;
 import org.apache.hadoop.io.Writable;
-import java.nio.channels.WritableByteChannel;
 import org.apache.hadoop.mapred.RecordWriter;
 import org.apache.hadoop.mapred.Reporter;
 import org.slf4j.Logger;
@@ -47,15 +48,28 @@ public class LlapArrowRecordWriter
   public static final Logger LOG = 
LoggerFactory.getLogger(LlapArrowRecordWriter.class);
 
   ArrowStreamWriter arrowStreamWriter;
-  WritableByteChannel out;
+  WritableByteChannelAdapter out;
+  BufferAllocator allocator;
+  NullableMapVector rootVector;
 
-  public LlapArrowRecordWriter(WritableByteChannel out) {
+  public LlapArrowRecordWriter(WritableByteChannelAdapter out) {
 this.out = out;
   }
 
   @Override
   public void close(Reporter reporter) throws IOException {
-arrowStreamWriter.close();
+try {
+  arrowStreamWriter.close();
+} finally {
+  rootVector.close();
+  //bytesLeaked should always be 0
+  long bytesLeaked = allocator.getAllocatedMemory();
+  if(bytesLeaked != 0) {
+LOG.error("Arrow memory leaked bytes: {}", bytesLeaked);
+throw new IllegalStateException("Arrow memory leaked bytes:" + 
bytesLeaked);
+  }
+  allocator.close();
+}
   }
 
   @Override
@@ -64,6 +78,9 @@ public class LlapArrowRecordWriter
 if (arrowStreamWriter == null) {
   VectorSchemaRoot vectorSchemaRoot = 
arrowWrapperWritable.getVectorSchemaRoot();
   arrowStreamWriter = new ArrowStreamWriter(vectorSchemaRoot, null, out);
+  allocator = arrowWrapperWritable.getAllocator();
+  this.out.setAllocator(allocator);
+  rootVector = arrowWrapperWritable.getRootVector();
 }
 arrowStreamWriter.writeBatch();
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/2820fc4c/ql/src/java/org/apache/hadoop/hive/llap/LlapOutputFormatService.java

hive git commit: HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman) (adduendum)

2018-07-11 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/branch-3.1 7e649028c -> c8796addd


HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers 
(Teddy Choi, reviewed by Eugene Koifman) (adduendum)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c8796add
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c8796add
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c8796add

Branch: refs/heads/branch-3.1
Commit: c8796a27ca94382871d2f838050cf5e4d454
Parents: 7e64902
Author: Teddy Choi 
Authored: Thu Jul 12 08:13:27 2018 +0900
Committer: Teddy Choi 
Committed: Thu Jul 12 08:15:05 2018 +0900

--
 .../queries/clientpositive/vector_delete_orig_table.q|  2 ++
 .../clientpositive/vector_delete_orig_table.q.out| 11 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c8796add/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q 
b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
index f914408..48ef5e2 100644
--- a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
+++ b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
@@ -29,6 +29,8 @@ select count(*) from acid_dot;
 
 select count(*) from acid_dot;
 
+select count(*) from acid_dot where cint < -1070551679;
+
 delete from acid_dot where cint < -1070551679;
 
 select count(*) from acid_dot;

http://git-wip-us.apache.org/repos/asf/hive/blob/c8796add/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out 
b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
index 5d7f310..60d3c4d 100644
--- a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
+++ b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
@@ -132,6 +132,15 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid_dot
  A masked pattern was here 
 12288
+PREHOOK: query: select count(*) from acid_dot where cint < -1070551679
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+ A masked pattern was here 
+POSTHOOK: query: select count(*) from acid_dot where cint < -1070551679
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+ A masked pattern was here 
+8
 PREHOOK: query: delete from acid_dot where cint < -1070551679
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acid_dot
@@ -148,5 +157,5 @@ POSTHOOK: query: select count(*) from acid_dot
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid_dot
  A masked pattern was here 
-12192
+12280
  A masked pattern was here

hive git commit: HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman) (adduendum)

2018-07-11 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/branch-3 c829f61cb -> b7834e679


HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers 
(Teddy Choi, reviewed by Eugene Koifman) (adduendum)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b7834e67
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b7834e67
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b7834e67

Branch: refs/heads/branch-3
Commit: b7834e679dbef5774b74d05b0136b888d1ff3ff3
Parents: c829f61
Author: Teddy Choi 
Authored: Thu Jul 12 08:13:27 2018 +0900
Committer: Teddy Choi 
Committed: Thu Jul 12 08:14:27 2018 +0900

--
 .../queries/clientpositive/vector_delete_orig_table.q|  2 ++
 .../clientpositive/vector_delete_orig_table.q.out| 11 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b7834e67/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q 
b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
index f914408..48ef5e2 100644
--- a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
+++ b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
@@ -29,6 +29,8 @@ select count(*) from acid_dot;
 
 select count(*) from acid_dot;
 
+select count(*) from acid_dot where cint < -1070551679;
+
 delete from acid_dot where cint < -1070551679;
 
 select count(*) from acid_dot;

http://git-wip-us.apache.org/repos/asf/hive/blob/b7834e67/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out 
b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
index 5d7f310..60d3c4d 100644
--- a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
+++ b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
@@ -132,6 +132,15 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid_dot
  A masked pattern was here 
 12288
+PREHOOK: query: select count(*) from acid_dot where cint < -1070551679
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+ A masked pattern was here 
+POSTHOOK: query: select count(*) from acid_dot where cint < -1070551679
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+ A masked pattern was here 
+8
 PREHOOK: query: delete from acid_dot where cint < -1070551679
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acid_dot
@@ -148,5 +157,5 @@ POSTHOOK: query: select count(*) from acid_dot
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid_dot
  A masked pattern was here 
-12192
+12280
  A masked pattern was here

hive git commit: HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman) (adduendum)

2018-07-11 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/master 6340a81f4 -> 19c2ec485


HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers 
(Teddy Choi, reviewed by Eugene Koifman) (adduendum)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/19c2ec48
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/19c2ec48
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/19c2ec48

Branch: refs/heads/master
Commit: 19c2ec4855d4dd5baf512ef23262555431b6c538
Parents: 6340a81
Author: Teddy Choi 
Authored: Thu Jul 12 08:13:27 2018 +0900
Committer: Teddy Choi 
Committed: Thu Jul 12 08:13:27 2018 +0900

--
 .../queries/clientpositive/vector_delete_orig_table.q|  2 ++
 .../clientpositive/vector_delete_orig_table.q.out| 11 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/19c2ec48/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q 
b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
index f914408..48ef5e2 100644
--- a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
+++ b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
@@ -29,6 +29,8 @@ select count(*) from acid_dot;
 
 select count(*) from acid_dot;
 
+select count(*) from acid_dot where cint < -1070551679;
+
 delete from acid_dot where cint < -1070551679;
 
 select count(*) from acid_dot;

http://git-wip-us.apache.org/repos/asf/hive/blob/19c2ec48/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out 
b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
index 5d7f310..60d3c4d 100644
--- a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
+++ b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
@@ -132,6 +132,15 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid_dot
  A masked pattern was here 
 12288
+PREHOOK: query: select count(*) from acid_dot where cint < -1070551679
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+ A masked pattern was here 
+POSTHOOK: query: select count(*) from acid_dot where cint < -1070551679
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+ A masked pattern was here 
+8
 PREHOOK: query: delete from acid_dot where cint < -1070551679
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acid_dot
@@ -148,5 +157,5 @@ POSTHOOK: query: select count(*) from acid_dot
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid_dot
  A masked pattern was here 
-12192
+12280
  A masked pattern was here

hive git commit: HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman)

2018-07-11 Thread tchoi

Repository: hive
Updated Branches:
  refs/heads/branch-3.1 7e46905c6 -> 7e649028c


HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers 
(Teddy Choi, reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7e649028
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7e649028
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7e649028

Branch: refs/heads/branch-3.1
Commit: 7e649028c343c3e5491d5fc5b3d271cb897f3f21
Parents: 7e46905
Author: Teddy Choi 
Authored: Thu Jul 12 06:13:40 2018 +0900
Committer: Teddy Choi 
Committed: Thu Jul 12 06:26:00 2018 +0900

--
 .../org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java | 6 ++
 .../apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java  | 4 
 2 files changed, 10 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/7e649028/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index d177e3f..889bd58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -147,6 +147,12 @@ public class RecordReaderImpl extends 
org.apache.orc.impl.RecordReaderImpl
   public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException {
 // If the user hasn't been reading by row, use the fast path.
 if (rowInBatch >= batch.size) {
+  if (batch.size > 0) {
+// the local batch has been consumed entirely, reset it
+batch.reset();
+  }
+  baseRow = super.getRowNumber();
+  rowInBatch = 0;
   return super.nextBatch(theirBatch);
 }
 copyIntoBatch(theirBatch, batch, rowInBatch);

http://git-wip-us.apache.org/repos/asf/hive/blob/7e649028/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
index 2071d13..aa99e57 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
@@ -154,9 +154,13 @@ public class TestVectorizedORCReader {
 VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
 OrcStruct row = null;
 
+long lastRowNumber = -1;
 // Check Vectorized ORC reader against ORC row reader
 while (vrr.nextBatch(batch)) {
+  Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber());
   for (int i = 0; i < batch.size; i++) {
+Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber()+i);
+lastRowNumber = rr.getRowNumber();
 row = (OrcStruct) rr.next(row);
 for (int j = 0; j < batch.cols.length; j++) {
   Object a = (row.getFieldValue(j));

1 2 >

1 - 100 of 107 matches

Mail list logo