[1/2] hive git commit: Revert "HIVE-14540. Add support in ptest to create batches for non qfile tests."

2016-09-14 Thread sseth
Repository: hive
Updated Branches:
  refs/heads/master 94dcf4abb -> 0e91e28a9


http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
--
diff --git 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
index e165240..3270167 100644
--- 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
+++ 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
@@ -15,7 +15,6 @@
 # limitations under the License.
 
 set -x
-date +"%Y-%m-%d %T.%3N"
 umask 0022
 echo $$
 ps -e -o pid,pgrp,user,args
@@ -38,8 +37,6 @@ export M2_OPTS="-Xmx1g -XX:MaxPermSize=256m 
-Djava.io.tmpdir=/some/log/dir/tmp $
 export HADOOP_ROOT_LOGGER=INFO,console
 export HADOOP_OPTS="-Dhive.log.dir=/some/log/dir -Dhive.query.id=hadoop 
-Djava.io.tmpdir=/some/log/dir/tmp"
 cd /some/local/dir/instance-1/apache-source || exit 1
-date +"%Y-%m-%d %T.%3N"
-echo "Pre test cleanup"
 if [[ -s batch.pid ]]
 then
   while read pid
@@ -55,11 +52,10 @@ echo "$$" > batch.pid
 find ./ -name 'TEST-*.xml' -delete
 find ./ -name 'hive.log' -delete
 find ./ -name junit_metastore_db | xargs -r rm -rf
-date +"%Y-%m-%d %T.%3N"
-echo "Pre test cleanup done"
 ret=0
 if [[ "maven" == "maven" ]]
 then
+  testModule=$(find ./ -name 'TestCliDriver.java' | awk -F'/' '{print $2}')
   if [[ -z "$testModule" ]]
   then
 testModule=./
@@ -67,7 +63,6 @@ then
   pushd $testModule
   timeout 40m mvn -B test -Dmaven.repo.local=/some/local/dir/instance-1/maven \
 -Dant=arg1 $mavenTestArgs -Dtest=arg1 1>/some/log/dir/maven-test.txt 2>&1 
> batch.pid
 wait $pid
 ret=$?
-date +"%Y-%m-%d %T.%3N"
 find ./ -type f -name hive.log -o -name spark.log -o -name derby.log | \
   xargs -I {} sh -c 'f=$(basename {}); test -f /some/log/dir/$f && 
f=$f-$(uuidgen); mv {} /some/log/dir/$f'
 find ./ -type f -name 'TEST-*.xml' | \
@@ -99,7 +93,7 @@ find ./ -path "*/spark/work" | \
   xargs -I {} sh -c 'mv {} /some/log/dir/spark-log'
 find ./ -type f -name 'syslog*' | \
   xargs -I {} sh -c 'mkdir -p /some/log/dir/syslogs; mv {} 
/some/log/dir/syslogs'
-date +"%Y-%m-%d %T.%3N"
+
 
 if [[ -f /some/log/dir/.log ]]
 then

http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
--
diff --git 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
index 29d2413..d58d910 100644
--- 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
+++ 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
@@ -15,7 +15,6 @@
 
 set -e
 set -x
-date +"%Y-%m-%d %T.%3N"
 if [[ -n "/usr/java/jdk1.7" ]]
 then
   export JAVA_HOME=/usr/java/jdk1.7
@@ -64,15 +63,13 @@ cd /some/working/dir/
 then
   git clone git:///repo1 apache-source
 fi
-date +"%Y-%m-%d %T.%3N"
 cd apache-source
 git fetch origin
 git reset --hard HEAD && git clean -f -d
 git checkout branch-1 || git checkout -b branch-1 origin/branch-1
 git reset --hard origin/branch-1
 git merge --ff-only origin/branch-1
-#git gc
-date +"%Y-%m-%d %T.%3N"
+git gc
   else
 echo "Unknown repository type 'git'"
 exit 1
@@ -88,11 +85,8 @@ cd /some/working/dir/
   then
 rm -rf /some/working/dir/maven/org/apache/hive
 mvn -B clean install -DskipTests 
-Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
-if [[ -d "itests" ]]
-then
-  cd itests
-  mvn -B clean install -DskipTests 
-Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
-fi
+cd itests
+mvn -B clean install -DskipTests 
-Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
   elif [[ "${buildTool}" == "ant" ]]
   then
 ant -Dant=arg1 -Divy.default.ivy.user.dir=/some/working/dir/ivy \
@@ -102,6 +96,5 @@ cd /some/working/dir/
  echo "Unknown build tool ${buildTool}"
  exit 127
fi
-   date +"%Y-%m-%d %T.%3N"
 ) 2>&1 | tee /some/log/dir/source-prep.txt
 exit ${PIPESTATUS[0]}

http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
--
diff --git 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
 

[2/2] hive git commit: Revert "HIVE-14540. Add support in ptest to create batches for non qfile tests."

2016-09-14 Thread sseth
Revert "HIVE-14540. Add support in ptest to create batches for non qfile tests."

This reverts commit 94dcf4abb360a0571eb54d220a1fb0b3130211cf.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0e91e28a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0e91e28a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0e91e28a

Branch: refs/heads/master
Commit: 0e91e28a9b81916259b8ec8d82445e398d33d4c3
Parents: 94dcf4a
Author: Siddharth Seth 
Authored: Wed Sep 14 14:42:14 2016 -0700
Committer: Siddharth Seth 
Committed: Wed Sep 14 14:42:14 2016 -0700

--
 .../hive/ptest/execution/ExecutionPhase.java|   8 -
 .../hive/ptest/execution/HostExecutor.java  |   7 +-
 .../ptest/execution/conf/FileListProvider.java  |  31 -
 .../ptest/execution/conf/QFileTestBatch.java|  11 +-
 .../hive/ptest/execution/conf/TestBatch.java|  38 +-
 .../ptest/execution/conf/TestConfiguration.java |   4 -
 .../hive/ptest/execution/conf/TestParser.java   |  54 +-
 .../ptest/execution/conf/UnitTestBatch.java |  88 +--
 .../conf/UnitTestPropertiesParser.java  | 662 --
 .../ptest2/src/main/resources/batch-exec.vm |  10 +-
 .../ptest2/src/main/resources/source-prep.vm|  20 +-
 .../execution/MockRSyncCommandExecutor.java |   7 -
 .../ptest/execution/MockSSHCommandExecutor.java |   7 -
 .../ptest/execution/TestExecutionPhase.java |   9 +-
 ...cutionPhase.testFailingUnitTest.approved.txt |   6 +-
 ...cutionPhase.testPassingUnitTest.approved.txt |   6 +-
 .../hive/ptest/execution/TestHostExecutor.java  |  51 +-
 .../TestHostExecutor.testBasic.approved.txt |  24 +-
 ...xecutor.testIsolatedFailsOnExec.approved.txt |  10 +-
 ...tor.testIsolatedFailsOnRsyncOne.approved.txt |   8 +-
 ...testIsolatedFailsOnRsyncUnknown.approved.txt |   8 +-
 ...xecutor.testParallelFailsOnExec.approved.txt |  10 +-
 ...ecutor.testParallelFailsOnRsync.approved.txt |   8 +-
 ...tScripts.testAlternativeTestJVM.approved.txt |  10 +-
 .../TestScripts.testBatch.approved.txt  |  10 +-
 .../TestScripts.testPrepGit.approved.txt|  13 +-
 .../TestScripts.testPrepHadoop1.approved.txt|  20 +-
 .../TestScripts.testPrepNone.approved.txt   |  13 +-
 .../TestScripts.testPrepSvn.approved.txt|  13 +-
 .../execution/conf/TestQFileTestBatch.java  |   2 +-
 .../ptest/execution/conf/TestTestParser.java|   3 -
 .../conf/TestUnitTestPropertiesParser.java  | 671 ---
 .../ptest2/src/test/resources/log4j2.properties |  62 --
 .../resources/test-configuration2.properties| 135 
 34 files changed, 166 insertions(+), 1873 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
--
diff --git 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
index 65af6fa..6063afc 100644
--- 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
+++ 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
@@ -88,14 +88,6 @@ public class ExecutionPhase extends Phase {
 }
 logger.info("ParallelWorkQueueSize={}, IsolatedWorkQueueSize={}", 
parallelWorkQueue.size(),
 isolatedWorkQueue.size());
-if (logger.isDebugEnabled()) {
-  for (TestBatch testBatch : parallelWorkQueue) {
-logger.debug("PBatch: {}", testBatch);
-  }
-  for (TestBatch testBatch : isolatedWorkQueue) {
-logger.debug("IBatch: {}", testBatch);
-  }
-}
 try {
   int expectedNumHosts = hostExecutors.size();
   initalizeHosts();

http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
--
diff --git 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
index 123e310..5f84f00 100644
--- 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
+++ 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
@@ -30,7 +30,6 @@ import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.TimeUnit;
 
 import com.google.common.base.Stopwatch;
-import org.apache.commons.lang.StringUtils;
 import org.apache.hive.ptest.execution.conf.Host;
 import org.apache.hive.ptest.execution.conf.TestBatch;
 import org.apache.hive.ptest.execution.ssh.RSyncCommand;
@@ -41,6 

[2/2] hive git commit: HIVE-14540. Add support in ptest to create batches for non qfile tests. (Siddharth Seth, reviewed by Prasanth Jayachandran)

2016-09-14 Thread sseth
HIVE-14540. Add support in ptest to create batches for non qfile tests.
(Siddharth Seth, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/94dcf4ab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/94dcf4ab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/94dcf4ab

Branch: refs/heads/master
Commit: 94dcf4abb360a0571eb54d220a1fb0b3130211cf
Parents: 76fe9e7
Author: Siddharth Seth 
Authored: Wed Sep 14 14:27:46 2016 -0700
Committer: Siddharth Seth 
Committed: Wed Sep 14 14:27:46 2016 -0700

--
 .../hive/ptest/execution/ExecutionPhase.java|   8 +
 .../hive/ptest/execution/HostExecutor.java  |   7 +-
 .../ptest/execution/conf/FileListProvider.java  |  31 +
 .../ptest/execution/conf/QFileTestBatch.java|  11 +-
 .../hive/ptest/execution/conf/TestBatch.java|  38 +-
 .../ptest/execution/conf/TestConfiguration.java |   4 +
 .../hive/ptest/execution/conf/TestParser.java   |  54 +-
 .../ptest/execution/conf/UnitTestBatch.java |  88 ++-
 .../conf/UnitTestPropertiesParser.java  | 662 ++
 .../ptest2/src/main/resources/batch-exec.vm |  10 +-
 .../ptest2/src/main/resources/source-prep.vm|  20 +-
 .../execution/MockRSyncCommandExecutor.java |   7 +
 .../ptest/execution/MockSSHCommandExecutor.java |   7 +
 .../ptest/execution/TestExecutionPhase.java |   9 +-
 ...cutionPhase.testFailingUnitTest.approved.txt |   6 +-
 ...cutionPhase.testPassingUnitTest.approved.txt |   6 +-
 .../hive/ptest/execution/TestHostExecutor.java  |  51 +-
 .../TestHostExecutor.testBasic.approved.txt |  24 +-
 ...xecutor.testIsolatedFailsOnExec.approved.txt |  10 +-
 ...tor.testIsolatedFailsOnRsyncOne.approved.txt |   8 +-
 ...testIsolatedFailsOnRsyncUnknown.approved.txt |   8 +-
 ...xecutor.testParallelFailsOnExec.approved.txt |  10 +-
 ...ecutor.testParallelFailsOnRsync.approved.txt |   8 +-
 ...tScripts.testAlternativeTestJVM.approved.txt |  10 +-
 .../TestScripts.testBatch.approved.txt  |  10 +-
 .../TestScripts.testPrepGit.approved.txt|  13 +-
 .../TestScripts.testPrepHadoop1.approved.txt|  20 +-
 .../TestScripts.testPrepNone.approved.txt   |  13 +-
 .../TestScripts.testPrepSvn.approved.txt|  13 +-
 .../execution/conf/TestQFileTestBatch.java  |   2 +-
 .../ptest/execution/conf/TestTestParser.java|   3 +
 .../conf/TestUnitTestPropertiesParser.java  | 671 +++
 .../ptest2/src/test/resources/log4j2.properties |  62 ++
 .../resources/test-configuration2.properties| 135 
 34 files changed, 1873 insertions(+), 166 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
--
diff --git 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
index 6063afc..65af6fa 100644
--- 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
+++ 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
@@ -88,6 +88,14 @@ public class ExecutionPhase extends Phase {
 }
 logger.info("ParallelWorkQueueSize={}, IsolatedWorkQueueSize={}", 
parallelWorkQueue.size(),
 isolatedWorkQueue.size());
+if (logger.isDebugEnabled()) {
+  for (TestBatch testBatch : parallelWorkQueue) {
+logger.debug("PBatch: {}", testBatch);
+  }
+  for (TestBatch testBatch : isolatedWorkQueue) {
+logger.debug("IBatch: {}", testBatch);
+  }
+}
 try {
   int expectedNumHosts = hostExecutors.size();
   initalizeHosts();

http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
--
diff --git 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
index 5f84f00..123e310 100644
--- 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
+++ 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java
@@ -30,6 +30,7 @@ import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.TimeUnit;
 
 import com.google.common.base.Stopwatch;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hive.ptest.execution.conf.Host;
 import org.apache.hive.ptest.execution.conf.TestBatch;
 import org.apache.hive.ptest.execution.ssh.RSyncCommand;
@@ -40,7 +41,6 @@ import 

[1/2] hive git commit: HIVE-14540. Add support in ptest to create batches for non qfile tests. (Siddharth Seth, reviewed by Prasanth Jayachandran)

2016-09-14 Thread sseth
Repository: hive
Updated Branches:
  refs/heads/master 76fe9e783 -> 94dcf4abb


http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
--
diff --git 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
index 3270167..e165240 100644
--- 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
+++ 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 set -x
+date +"%Y-%m-%d %T.%3N"
 umask 0022
 echo $$
 ps -e -o pid,pgrp,user,args
@@ -37,6 +38,8 @@ export M2_OPTS="-Xmx1g -XX:MaxPermSize=256m 
-Djava.io.tmpdir=/some/log/dir/tmp $
 export HADOOP_ROOT_LOGGER=INFO,console
 export HADOOP_OPTS="-Dhive.log.dir=/some/log/dir -Dhive.query.id=hadoop 
-Djava.io.tmpdir=/some/log/dir/tmp"
 cd /some/local/dir/instance-1/apache-source || exit 1
+date +"%Y-%m-%d %T.%3N"
+echo "Pre test cleanup"
 if [[ -s batch.pid ]]
 then
   while read pid
@@ -52,10 +55,11 @@ echo "$$" > batch.pid
 find ./ -name 'TEST-*.xml' -delete
 find ./ -name 'hive.log' -delete
 find ./ -name junit_metastore_db | xargs -r rm -rf
+date +"%Y-%m-%d %T.%3N"
+echo "Pre test cleanup done"
 ret=0
 if [[ "maven" == "maven" ]]
 then
-  testModule=$(find ./ -name 'TestCliDriver.java' | awk -F'/' '{print $2}')
   if [[ -z "$testModule" ]]
   then
 testModule=./
@@ -63,6 +67,7 @@ then
   pushd $testModule
   timeout 40m mvn -B test -Dmaven.repo.local=/some/local/dir/instance-1/maven \
 -Dant=arg1 $mavenTestArgs -Dtest=arg1 1>/some/log/dir/maven-test.txt 2>&1 
> batch.pid
 wait $pid
 ret=$?
+date +"%Y-%m-%d %T.%3N"
 find ./ -type f -name hive.log -o -name spark.log -o -name derby.log | \
   xargs -I {} sh -c 'f=$(basename {}); test -f /some/log/dir/$f && 
f=$f-$(uuidgen); mv {} /some/log/dir/$f'
 find ./ -type f -name 'TEST-*.xml' | \
@@ -93,7 +99,7 @@ find ./ -path "*/spark/work" | \
   xargs -I {} sh -c 'mv {} /some/log/dir/spark-log'
 find ./ -type f -name 'syslog*' | \
   xargs -I {} sh -c 'mkdir -p /some/log/dir/syslogs; mv {} 
/some/log/dir/syslogs'
-
+date +"%Y-%m-%d %T.%3N"
 
 if [[ -f /some/log/dir/.log ]]
 then

http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
--
diff --git 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
index d58d910..29d2413 100644
--- 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
+++ 
b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
@@ -15,6 +15,7 @@
 
 set -e
 set -x
+date +"%Y-%m-%d %T.%3N"
 if [[ -n "/usr/java/jdk1.7" ]]
 then
   export JAVA_HOME=/usr/java/jdk1.7
@@ -63,13 +64,15 @@ cd /some/working/dir/
 then
   git clone git:///repo1 apache-source
 fi
+date +"%Y-%m-%d %T.%3N"
 cd apache-source
 git fetch origin
 git reset --hard HEAD && git clean -f -d
 git checkout branch-1 || git checkout -b branch-1 origin/branch-1
 git reset --hard origin/branch-1
 git merge --ff-only origin/branch-1
-git gc
+#git gc
+date +"%Y-%m-%d %T.%3N"
   else
 echo "Unknown repository type 'git'"
 exit 1
@@ -85,8 +88,11 @@ cd /some/working/dir/
   then
 rm -rf /some/working/dir/maven/org/apache/hive
 mvn -B clean install -DskipTests 
-Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
-cd itests
-mvn -B clean install -DskipTests 
-Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
+if [[ -d "itests" ]]
+then
+  cd itests
+  mvn -B clean install -DskipTests 
-Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
+fi
   elif [[ "${buildTool}" == "ant" ]]
   then
 ant -Dant=arg1 -Divy.default.ivy.user.dir=/some/working/dir/ivy \
@@ -96,5 +102,6 @@ cd /some/working/dir/
  echo "Unknown build tool ${buildTool}"
  exit 127
fi
+   date +"%Y-%m-%d %T.%3N"
 ) 2>&1 | tee /some/log/dir/source-prep.txt
 exit ${PIPESTATUS[0]}

http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
--
diff --git 
a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
 

[1/2] hive git commit: HIVE-14251: Union All of different types resolves to incorrect data (Aihua Xu, reviewed by Mohit Sabharwal)

2016-09-14 Thread aihuaxu
Repository: hive
Updated Branches:
  refs/heads/master 0b62e6f38 -> 76fe9e783


http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/spark/union32.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union32.q.out 
b/ql/src/test/results/clientpositive/spark/union32.q.out
index 16cb243..755e936 100644
--- a/ql/src/test/results/clientpositive/spark/union32.q.out
+++ b/ql/src/test/results/clientpositive/spark/union32.q.out
@@ -429,14 +429,14 @@ POSTHOOK: Input: default@t2
 PREHOOK: query: -- Test union with join on the left selecting multiple columns
 EXPLAIN
 SELECT * FROM 
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a 
JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 
a JOIN t2 b ON a.key = b.key
 UNION ALL
 SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
 PREHOOK: type: QUERY
 POSTHOOK: query: -- Test union with join on the left selecting multiple columns
 EXPLAIN
 SELECT * FROM 
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a 
JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 
a JOIN t2 b ON a.key = b.key
 UNION ALL
 SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
 POSTHOOK: type: QUERY
@@ -509,7 +509,7 @@ STAGE PLANS:
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: UDFToDouble(UDFToLong(_col0)) (type: double), 
UDFToString(UDFToDouble(_col1)) (type: string)
+  expressions: UDFToDouble(UDFToLong(_col0)) (type: double), 
UDFToString(CAST( _col1 AS CHAR(20)) (type: string)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE 
Column stats: NONE
   File Output Operator
@@ -527,7 +527,7 @@ STAGE PLANS:
 ListSink
 
 PREHOOK: query: SELECT * FROM 
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a 
JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM 
t1 a JOIN t2 b ON a.key = b.key
 UNION ALL
 SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
 PREHOOK: type: QUERY
@@ -535,7 +535,7 @@ PREHOOK: Input: default@t1
 PREHOOK: Input: default@t2
  A masked pattern was here 
 POSTHOOK: query: SELECT * FROM 
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a 
JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM 
t1 a JOIN t2 b ON a.key = b.key
 UNION ALL
 SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
 POSTHOOK: type: QUERY
@@ -545,48 +545,48 @@ POSTHOOK: Input: default@t2
 0.00
 0.00
 0.00
-0.00.0
-0.00.0
-0.00.0
-0.00.0
-0.00.0
-0.00.0
-0.00.0
-0.00.0
-0.00.0
+0.00
+0.00
+0.00
+0.00
+0.00
+0.00
+0.00
+0.00
+0.00
 2.02
-2.02.0
+2.02
+4.04
 4.04
-4.04.0
 5.05
 5.05
 5.05
-5.05.0
-5.05.0
-5.05.0
-5.05.0
-5.05.0
-5.05.0
-5.05.0
-5.05.0
-5.05.0
+5.05
+5.05
+5.05
+5.05
+5.05
+5.05
+5.05
+5.05
+5.05
+8.08
 8.08
-8.08.0
 9.09
-9.09.0
+9.09
 PREHOOK: query: -- Test union with join on the right selecting multiple columns
 EXPLAIN
 SELECT * FROM 
 (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
 UNION ALL
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a 
JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 
a JOIN t2 b ON a.key = b.key) a
 PREHOOK: type: QUERY
 POSTHOOK: query: -- Test union with join on the right selecting multiple 
columns
 EXPLAIN
 SELECT * FROM 
 (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
 UNION ALL
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a 
JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 
a JOIN t2 b ON a.key = b.key) a
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -605,7 +605,7 @@ STAGE PLANS:
   alias: t2
   Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: UDFToDouble(key) (type: double), 
UDFToDouble(key) (type: double)
+expressions: UDFToDouble(key) (type: double), key (type: 
string)
 outputColumnNames: _col0, 

[1/2] hive git commit: HIVE-13878: Vectorization: Column pruning for Text vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-09-14 Thread mmccline
Repository: hive
Updated Branches:
  refs/heads/master ff67cdda1 -> 0b62e6f38


http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
--
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index 472ace7..ee945d4 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -39,7 +39,7 @@ import org.apache.hadoop.io.WritableUtils;
  * Directly deserialize with the caller reading field-by-field the LazyBinary 
serialization format.
  *
  * The caller is responsible for calling the read method for the right type of 
each field
- * (after calling readCheckNull).
+ * (after calling readNextField).
  *
  * Reading some fields require a results object to receive value information.  
A separate
  * results object is created by the caller at initialization per different 
field even for the same
@@ -65,17 +65,12 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
   private VInt tempVInt;
   private VLong tempVLong;
 
-  private boolean readBeyondConfiguredFieldsWarned;
-  private boolean bufferRangeHasExtraDataWarned;
-
   public LazyBinaryDeserializeRead(TypeInfo[] typeInfos, boolean 
useExternalBuffer) {
 super(typeInfos, useExternalBuffer);
 fieldCount = typeInfos.length;
 tempVInt = new VInt();
 tempVLong = new VLong();
 currentExternalBufferNeeded = false;
-readBeyondConfiguredFieldsWarned = false;
-bufferRangeHasExtraDataWarned = false;
   }
 
   // Not public since we must have the field count so every 8 fields NULL 
bytes can be navigated.
@@ -122,22 +117,19 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
   }
 
   /*
-   * Reads the NULL information for a field.
+   * Reads the the next field.
+   *
+   * Afterwards, reading is positioned to the next field.
+   *
+   * @return  Return true when the field was not null and data is put in the 
appropriate
+   *  current* member.
+   *  Otherwise, false when the field is null.
*
-   * @return Returns true when the field is NULL; reading is positioned to the 
next field.
-   * Otherwise, false when the field is NOT NULL; reading is 
positioned to the field data.
*/
   @Override
-  public boolean readCheckNull() throws IOException {
+  public boolean readNextField() throws IOException {
 if (fieldIndex >= fieldCount) {
-  // Reading beyond the specified field count produces NULL.
-  if (!readBeyondConfiguredFieldsWarned) {
-// Warn only once.
-LOG.info("Reading beyond configured fields! Configured " + fieldCount 
+ " fields but "
-+ " reading more (NULLs returned).  Ignoring similar problems.");
-readBeyondConfiguredFieldsWarned = true;
-  }
-  return true;
+  return false;
 }
 
 fieldStart = offset;
@@ -151,12 +143,24 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
   nullByte = bytes[offset++];
 }
 
-// NOTE: The bit is set to 1 if a field is NOT NULL.
-boolean isNull;
+// NOTE: The bit is set to 1 if a field is NOT NULL.boolean isNull;
 if ((nullByte & (1 << (fieldIndex % 8))) == 0) {
-  isNull = true;
+
+  // Logically move past this field.
+  fieldIndex++;
+
+  // Every 8 fields we read a new NULL byte.
+  if (fieldIndex < fieldCount) {
+if ((fieldIndex % 8) == 0) {
+  // Get next null byte.
+  if (offset >= end) {
+throw new EOFException();
+  }
+  nullByte = bytes[offset++];
+}
+  }
+  return false;
 } else {
-  isNull = false;// Assume.
 
   // Make sure there is at least one byte that can be read for a value.
   if (offset >= end) {
@@ -336,24 +340,30 @@ public final class LazyBinaryDeserializeRead extends 
DeserializeRead {
 
   HiveDecimal decimal = 
currentHiveDecimalWritable.getHiveDecimal(precision, scale);
   if (decimal == null) {
-isNull = true;
-  } else {
-// Put value back into writable.
-currentHiveDecimalWritable.set(decimal);
+
+// Logically move past this field.
+fieldIndex++;
+
+// Every 8 fields we read a new NULL byte.
+if (fieldIndex < fieldCount) {
+  if ((fieldIndex % 8) == 0) {
+// Get next null byte.
+if (offset >= end) {
+  throw new EOFException();
+}
+nullByte = bytes[offset++];
+  }
+}
+return false;
 

[2/2] hive git commit: HIVE-13878: Vectorization: Column pruning for Text vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-09-14 Thread mmccline
HIVE-13878: Vectorization: Column pruning for Text vectorization (Matt McCline, 
reviewed by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b62e6f3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b62e6f3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b62e6f3

Branch: refs/heads/master
Commit: 0b62e6f38788de81816abacf025d61bbc80d75fa
Parents: ff67cdd
Author: Matt McCline 
Authored: Tue Sep 13 23:15:56 2016 -0700
Committer: Matt McCline 
Committed: Tue Sep 13 23:15:56 2016 -0700

--
 .../ql/exec/vector/VectorDeserializeRow.java| 238 +++---
 .../hive/ql/exec/vector/VectorMapOperator.java  |  22 +-
 .../fast/VectorMapJoinFastLongHashTable.java|   2 +-
 .../fast/VectorMapJoinFastStringCommon.java |   2 +-
 .../VectorMapJoinOptimizedLongCommon.java   |  56 --
 .../VectorMapJoinOptimizedStringCommon.java |  26 -
 .../hive/ql/optimizer/physical/Vectorizer.java  |  25 +-
 .../hive/ql/exec/vector/TestVectorSerDeRow.java |  14 +-
 .../mapjoin/fast/CheckFastRowHashMap.java   |  10 +-
 .../exec/vector/mapjoin/fast/VerifyFastRow.java |   2 +-
 .../fast/BinarySortableDeserializeRead.java | 132 ++--
 .../hive/serde2/fast/DeserializeRead.java   |  71 +-
 .../lazy/fast/LazySimpleDeserializeRead.java| 770 ++-
 .../fast/LazyBinaryDeserializeRead.java | 119 +--
 .../apache/hadoop/hive/serde2/VerifyFast.java   |   2 +-
 .../binarysortable/TestBinarySortableFast.java  |  35 +-
 .../hive/serde2/lazy/TestLazySimpleFast.java|  31 +-
 .../serde2/lazybinary/TestLazyBinaryFast.java   |  32 +-
 18 files changed, 787 insertions(+), 802 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
index 47bef43..d31d338 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.List;
 
 import org.slf4j.Logger;
@@ -97,20 +98,27 @@ public final class VectorDeserializeRow {
* We say "source" because when there is conversion we are converting th 
deserialized source into
* a target data type.
*/
-  boolean[] isConvert;
+
+  private boolean useReadField;
+// True when the (random access) readField method of 
DeserializeRead are being used.
+
+  private int[] readFieldLogicalIndices;
+// The logical indices for reading with readField.
+
+  private boolean[] isConvert;
 // For each column, are we converting the row column?
 
-  int[] projectionColumnNums;
+  private int[] projectionColumnNums;
 // Assigning can be a subset of columns, so this is the 
projection --
 // the batch column numbers.
 
-  Category[] sourceCategories;
+  private Category[] sourceCategories;
 // The data type category of each column being deserialized.
 
-  PrimitiveCategory[] sourcePrimitiveCategories;
+  private PrimitiveCategory[] sourcePrimitiveCategories;
 //The data type primitive category of each column being 
deserialized.
 
-  int[] maxLengths;
+  private int[] maxLengths;
 // For the CHAR and VARCHAR data types, the maximum character 
length of
 // the columns.  Otherwise, 0.
 
@@ -131,6 +139,7 @@ public final class VectorDeserializeRow {
   private void allocateArrays(int count) {
 isConvert = new boolean[count];
 projectionColumnNums = new int[count];
+Arrays.fill(projectionColumnNums, -1);
 sourceCategories = new Category[count];
 sourcePrimitiveCategories = new PrimitiveCategory[count];
 maxLengths = new int[count];
@@ -231,14 +240,18 @@ public final class VectorDeserializeRow {
 
   public void init(boolean[] columnsToIncludeTruncated) throws HiveException {
 
-if (columnsToIncludeTruncated != null) {
-  deserializeRead.setColumnsToInclude(columnsToIncludeTruncated);
-}
+// When truncated included is used, its length must be at least the number 
of source type infos.
+// When longer, we assume the caller will default with nulls, etc.
+Preconditions.checkState(
+columnsToIncludeTruncated == null ||
+columnsToIncludeTruncated.length == sourceTypeInfos.length);
 
-final int