[1/2] hive git commit: Revert "HIVE-14540. Add support in ptest to create batches for non qfile tests."
Repository: hive Updated Branches: refs/heads/master 94dcf4abb -> 0e91e28a9 http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt -- diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt index e165240..3270167 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt @@ -15,7 +15,6 @@ # limitations under the License. set -x -date +"%Y-%m-%d %T.%3N" umask 0022 echo $$ ps -e -o pid,pgrp,user,args @@ -38,8 +37,6 @@ export M2_OPTS="-Xmx1g -XX:MaxPermSize=256m -Djava.io.tmpdir=/some/log/dir/tmp $ export HADOOP_ROOT_LOGGER=INFO,console export HADOOP_OPTS="-Dhive.log.dir=/some/log/dir -Dhive.query.id=hadoop -Djava.io.tmpdir=/some/log/dir/tmp" cd /some/local/dir/instance-1/apache-source || exit 1 -date +"%Y-%m-%d %T.%3N" -echo "Pre test cleanup" if [[ -s batch.pid ]] then while read pid @@ -55,11 +52,10 @@ echo "$$" > batch.pid find ./ -name 'TEST-*.xml' -delete find ./ -name 'hive.log' -delete find ./ -name junit_metastore_db | xargs -r rm -rf -date +"%Y-%m-%d %T.%3N" -echo "Pre test cleanup done" ret=0 if [[ "maven" == "maven" ]] then + testModule=$(find ./ -name 'TestCliDriver.java' | awk -F'/' '{print $2}') if [[ -z "$testModule" ]] then testModule=./ @@ -67,7 +63,6 @@ then pushd $testModule timeout 40m mvn -B test -Dmaven.repo.local=/some/local/dir/instance-1/maven \ -Dant=arg1 $mavenTestArgs -Dtest=arg1 1>/some/log/dir/maven-test.txt 2>&1 > batch.pid wait $pid ret=$? -date +"%Y-%m-%d %T.%3N" find ./ -type f -name hive.log -o -name spark.log -o -name derby.log | \ xargs -I {} sh -c 'f=$(basename {}); test -f /some/log/dir/$f && f=$f-$(uuidgen); mv {} /some/log/dir/$f' find ./ -type f -name 'TEST-*.xml' | \ @@ -99,7 +93,7 @@ find ./ -path "*/spark/work" | \ xargs -I {} sh -c 'mv {} /some/log/dir/spark-log' find ./ -type f -name 'syslog*' | \ xargs -I {} sh -c 'mkdir -p /some/log/dir/syslogs; mv {} /some/log/dir/syslogs' -date +"%Y-%m-%d %T.%3N" + if [[ -f /some/log/dir/.log ]] then http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt -- diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt index 29d2413..d58d910 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt @@ -15,7 +15,6 @@ set -e set -x -date +"%Y-%m-%d %T.%3N" if [[ -n "/usr/java/jdk1.7" ]] then export JAVA_HOME=/usr/java/jdk1.7 @@ -64,15 +63,13 @@ cd /some/working/dir/ then git clone git:///repo1 apache-source fi -date +"%Y-%m-%d %T.%3N" cd apache-source git fetch origin git reset --hard HEAD && git clean -f -d git checkout branch-1 || git checkout -b branch-1 origin/branch-1 git reset --hard origin/branch-1 git merge --ff-only origin/branch-1 -#git gc -date +"%Y-%m-%d %T.%3N" +git gc else echo "Unknown repository type 'git'" exit 1 @@ -88,11 +85,8 @@ cd /some/working/dir/ then rm -rf /some/working/dir/maven/org/apache/hive mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2 -if [[ -d "itests" ]] -then - cd itests - mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2 -fi +cd itests +mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2 elif [[ "${buildTool}" == "ant" ]] then ant -Dant=arg1 -Divy.default.ivy.user.dir=/some/working/dir/ivy \ @@ -102,6 +96,5 @@ cd /some/working/dir/ echo "Unknown build tool ${buildTool}" exit 127 fi - date +"%Y-%m-%d %T.%3N" ) 2>&1 | tee /some/log/dir/source-prep.txt exit ${PIPESTATUS[0]} http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt -- diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
[2/2] hive git commit: Revert "HIVE-14540. Add support in ptest to create batches for non qfile tests."
Revert "HIVE-14540. Add support in ptest to create batches for non qfile tests." This reverts commit 94dcf4abb360a0571eb54d220a1fb0b3130211cf. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0e91e28a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0e91e28a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0e91e28a Branch: refs/heads/master Commit: 0e91e28a9b81916259b8ec8d82445e398d33d4c3 Parents: 94dcf4a Author: Siddharth SethAuthored: Wed Sep 14 14:42:14 2016 -0700 Committer: Siddharth Seth Committed: Wed Sep 14 14:42:14 2016 -0700 -- .../hive/ptest/execution/ExecutionPhase.java| 8 - .../hive/ptest/execution/HostExecutor.java | 7 +- .../ptest/execution/conf/FileListProvider.java | 31 - .../ptest/execution/conf/QFileTestBatch.java| 11 +- .../hive/ptest/execution/conf/TestBatch.java| 38 +- .../ptest/execution/conf/TestConfiguration.java | 4 - .../hive/ptest/execution/conf/TestParser.java | 54 +- .../ptest/execution/conf/UnitTestBatch.java | 88 +-- .../conf/UnitTestPropertiesParser.java | 662 -- .../ptest2/src/main/resources/batch-exec.vm | 10 +- .../ptest2/src/main/resources/source-prep.vm| 20 +- .../execution/MockRSyncCommandExecutor.java | 7 - .../ptest/execution/MockSSHCommandExecutor.java | 7 - .../ptest/execution/TestExecutionPhase.java | 9 +- ...cutionPhase.testFailingUnitTest.approved.txt | 6 +- ...cutionPhase.testPassingUnitTest.approved.txt | 6 +- .../hive/ptest/execution/TestHostExecutor.java | 51 +- .../TestHostExecutor.testBasic.approved.txt | 24 +- ...xecutor.testIsolatedFailsOnExec.approved.txt | 10 +- ...tor.testIsolatedFailsOnRsyncOne.approved.txt | 8 +- ...testIsolatedFailsOnRsyncUnknown.approved.txt | 8 +- ...xecutor.testParallelFailsOnExec.approved.txt | 10 +- ...ecutor.testParallelFailsOnRsync.approved.txt | 8 +- ...tScripts.testAlternativeTestJVM.approved.txt | 10 +- .../TestScripts.testBatch.approved.txt | 10 +- .../TestScripts.testPrepGit.approved.txt| 13 +- .../TestScripts.testPrepHadoop1.approved.txt| 20 +- .../TestScripts.testPrepNone.approved.txt | 13 +- .../TestScripts.testPrepSvn.approved.txt| 13 +- .../execution/conf/TestQFileTestBatch.java | 2 +- .../ptest/execution/conf/TestTestParser.java| 3 - .../conf/TestUnitTestPropertiesParser.java | 671 --- .../ptest2/src/test/resources/log4j2.properties | 62 -- .../resources/test-configuration2.properties| 135 34 files changed, 166 insertions(+), 1873 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java -- diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java index 65af6fa..6063afc 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java @@ -88,14 +88,6 @@ public class ExecutionPhase extends Phase { } logger.info("ParallelWorkQueueSize={}, IsolatedWorkQueueSize={}", parallelWorkQueue.size(), isolatedWorkQueue.size()); -if (logger.isDebugEnabled()) { - for (TestBatch testBatch : parallelWorkQueue) { -logger.debug("PBatch: {}", testBatch); - } - for (TestBatch testBatch : isolatedWorkQueue) { -logger.debug("IBatch: {}", testBatch); - } -} try { int expectedNumHosts = hostExecutors.size(); initalizeHosts(); http://git-wip-us.apache.org/repos/asf/hive/blob/0e91e28a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java -- diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java index 123e310..5f84f00 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java @@ -30,7 +30,6 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import com.google.common.base.Stopwatch; -import org.apache.commons.lang.StringUtils; import org.apache.hive.ptest.execution.conf.Host; import org.apache.hive.ptest.execution.conf.TestBatch; import org.apache.hive.ptest.execution.ssh.RSyncCommand; @@ -41,6
[2/2] hive git commit: HIVE-14540. Add support in ptest to create batches for non qfile tests. (Siddharth Seth, reviewed by Prasanth Jayachandran)
HIVE-14540. Add support in ptest to create batches for non qfile tests. (Siddharth Seth, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/94dcf4ab Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/94dcf4ab Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/94dcf4ab Branch: refs/heads/master Commit: 94dcf4abb360a0571eb54d220a1fb0b3130211cf Parents: 76fe9e7 Author: Siddharth SethAuthored: Wed Sep 14 14:27:46 2016 -0700 Committer: Siddharth Seth Committed: Wed Sep 14 14:27:46 2016 -0700 -- .../hive/ptest/execution/ExecutionPhase.java| 8 + .../hive/ptest/execution/HostExecutor.java | 7 +- .../ptest/execution/conf/FileListProvider.java | 31 + .../ptest/execution/conf/QFileTestBatch.java| 11 +- .../hive/ptest/execution/conf/TestBatch.java| 38 +- .../ptest/execution/conf/TestConfiguration.java | 4 + .../hive/ptest/execution/conf/TestParser.java | 54 +- .../ptest/execution/conf/UnitTestBatch.java | 88 ++- .../conf/UnitTestPropertiesParser.java | 662 ++ .../ptest2/src/main/resources/batch-exec.vm | 10 +- .../ptest2/src/main/resources/source-prep.vm| 20 +- .../execution/MockRSyncCommandExecutor.java | 7 + .../ptest/execution/MockSSHCommandExecutor.java | 7 + .../ptest/execution/TestExecutionPhase.java | 9 +- ...cutionPhase.testFailingUnitTest.approved.txt | 6 +- ...cutionPhase.testPassingUnitTest.approved.txt | 6 +- .../hive/ptest/execution/TestHostExecutor.java | 51 +- .../TestHostExecutor.testBasic.approved.txt | 24 +- ...xecutor.testIsolatedFailsOnExec.approved.txt | 10 +- ...tor.testIsolatedFailsOnRsyncOne.approved.txt | 8 +- ...testIsolatedFailsOnRsyncUnknown.approved.txt | 8 +- ...xecutor.testParallelFailsOnExec.approved.txt | 10 +- ...ecutor.testParallelFailsOnRsync.approved.txt | 8 +- ...tScripts.testAlternativeTestJVM.approved.txt | 10 +- .../TestScripts.testBatch.approved.txt | 10 +- .../TestScripts.testPrepGit.approved.txt| 13 +- .../TestScripts.testPrepHadoop1.approved.txt| 20 +- .../TestScripts.testPrepNone.approved.txt | 13 +- .../TestScripts.testPrepSvn.approved.txt| 13 +- .../execution/conf/TestQFileTestBatch.java | 2 +- .../ptest/execution/conf/TestTestParser.java| 3 + .../conf/TestUnitTestPropertiesParser.java | 671 +++ .../ptest2/src/test/resources/log4j2.properties | 62 ++ .../resources/test-configuration2.properties| 135 34 files changed, 1873 insertions(+), 166 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java -- diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java index 6063afc..65af6fa 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java @@ -88,6 +88,14 @@ public class ExecutionPhase extends Phase { } logger.info("ParallelWorkQueueSize={}, IsolatedWorkQueueSize={}", parallelWorkQueue.size(), isolatedWorkQueue.size()); +if (logger.isDebugEnabled()) { + for (TestBatch testBatch : parallelWorkQueue) { +logger.debug("PBatch: {}", testBatch); + } + for (TestBatch testBatch : isolatedWorkQueue) { +logger.debug("IBatch: {}", testBatch); + } +} try { int expectedNumHosts = hostExecutors.size(); initalizeHosts(); http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java -- diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java index 5f84f00..123e310 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java @@ -30,6 +30,7 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import com.google.common.base.Stopwatch; +import org.apache.commons.lang.StringUtils; import org.apache.hive.ptest.execution.conf.Host; import org.apache.hive.ptest.execution.conf.TestBatch; import org.apache.hive.ptest.execution.ssh.RSyncCommand; @@ -40,7 +41,6 @@ import
[1/2] hive git commit: HIVE-14540. Add support in ptest to create batches for non qfile tests. (Siddharth Seth, reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master 76fe9e783 -> 94dcf4abb http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt -- diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt index 3270167..e165240 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testBatch.approved.txt @@ -15,6 +15,7 @@ # limitations under the License. set -x +date +"%Y-%m-%d %T.%3N" umask 0022 echo $$ ps -e -o pid,pgrp,user,args @@ -37,6 +38,8 @@ export M2_OPTS="-Xmx1g -XX:MaxPermSize=256m -Djava.io.tmpdir=/some/log/dir/tmp $ export HADOOP_ROOT_LOGGER=INFO,console export HADOOP_OPTS="-Dhive.log.dir=/some/log/dir -Dhive.query.id=hadoop -Djava.io.tmpdir=/some/log/dir/tmp" cd /some/local/dir/instance-1/apache-source || exit 1 +date +"%Y-%m-%d %T.%3N" +echo "Pre test cleanup" if [[ -s batch.pid ]] then while read pid @@ -52,10 +55,11 @@ echo "$$" > batch.pid find ./ -name 'TEST-*.xml' -delete find ./ -name 'hive.log' -delete find ./ -name junit_metastore_db | xargs -r rm -rf +date +"%Y-%m-%d %T.%3N" +echo "Pre test cleanup done" ret=0 if [[ "maven" == "maven" ]] then - testModule=$(find ./ -name 'TestCliDriver.java' | awk -F'/' '{print $2}') if [[ -z "$testModule" ]] then testModule=./ @@ -63,6 +67,7 @@ then pushd $testModule timeout 40m mvn -B test -Dmaven.repo.local=/some/local/dir/instance-1/maven \ -Dant=arg1 $mavenTestArgs -Dtest=arg1 1>/some/log/dir/maven-test.txt 2>&1 > batch.pid wait $pid ret=$? +date +"%Y-%m-%d %T.%3N" find ./ -type f -name hive.log -o -name spark.log -o -name derby.log | \ xargs -I {} sh -c 'f=$(basename {}); test -f /some/log/dir/$f && f=$f-$(uuidgen); mv {} /some/log/dir/$f' find ./ -type f -name 'TEST-*.xml' | \ @@ -93,7 +99,7 @@ find ./ -path "*/spark/work" | \ xargs -I {} sh -c 'mv {} /some/log/dir/spark-log' find ./ -type f -name 'syslog*' | \ xargs -I {} sh -c 'mkdir -p /some/log/dir/syslogs; mv {} /some/log/dir/syslogs' - +date +"%Y-%m-%d %T.%3N" if [[ -f /some/log/dir/.log ]] then http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt -- diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt index d58d910..29d2413 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt @@ -15,6 +15,7 @@ set -e set -x +date +"%Y-%m-%d %T.%3N" if [[ -n "/usr/java/jdk1.7" ]] then export JAVA_HOME=/usr/java/jdk1.7 @@ -63,13 +64,15 @@ cd /some/working/dir/ then git clone git:///repo1 apache-source fi +date +"%Y-%m-%d %T.%3N" cd apache-source git fetch origin git reset --hard HEAD && git clean -f -d git checkout branch-1 || git checkout -b branch-1 origin/branch-1 git reset --hard origin/branch-1 git merge --ff-only origin/branch-1 -git gc +#git gc +date +"%Y-%m-%d %T.%3N" else echo "Unknown repository type 'git'" exit 1 @@ -85,8 +88,11 @@ cd /some/working/dir/ then rm -rf /some/working/dir/maven/org/apache/hive mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2 -cd itests -mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2 +if [[ -d "itests" ]] +then + cd itests + mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2 +fi elif [[ "${buildTool}" == "ant" ]] then ant -Dant=arg1 -Divy.default.ivy.user.dir=/some/working/dir/ivy \ @@ -96,5 +102,6 @@ cd /some/working/dir/ echo "Unknown build tool ${buildTool}" exit 127 fi + date +"%Y-%m-%d %T.%3N" ) 2>&1 | tee /some/log/dir/source-prep.txt exit ${PIPESTATUS[0]} http://git-wip-us.apache.org/repos/asf/hive/blob/94dcf4ab/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt -- diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
[1/2] hive git commit: HIVE-14251: Union All of different types resolves to incorrect data (Aihua Xu, reviewed by Mohit Sabharwal)
Repository: hive Updated Branches: refs/heads/master 0b62e6f38 -> 76fe9e783 http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/spark/union32.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union32.q.out b/ql/src/test/results/clientpositive/spark/union32.q.out index 16cb243..755e936 100644 --- a/ql/src/test/results/clientpositive/spark/union32.q.out +++ b/ql/src/test/results/clientpositive/spark/union32.q.out @@ -429,14 +429,14 @@ POSTHOOK: Input: default@t2 PREHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -509,7 +509,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(UDFToDouble(_col1)) (type: string) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(CAST( _col1 AS CHAR(20)) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -527,7 +527,7 @@ STAGE PLANS: ListSink PREHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY @@ -535,7 +535,7 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 A masked pattern was here POSTHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -545,48 +545,48 @@ POSTHOOK: Input: default@t2 0.00 0.00 0.00 -0.00.0 -0.00.0 -0.00.0 -0.00.0 -0.00.0 -0.00.0 -0.00.0 -0.00.0 -0.00.0 +0.00 +0.00 +0.00 +0.00 +0.00 +0.00 +0.00 +0.00 +0.00 2.02 -2.02.0 +2.02 +4.04 4.04 -4.04.0 5.05 5.05 5.05 -5.05.0 -5.05.0 -5.05.0 -5.05.0 -5.05.0 -5.05.0 -5.05.0 -5.05.0 -5.05.0 +5.05 +5.05 +5.05 +5.05 +5.05 +5.05 +5.05 +5.05 +5.05 +8.08 8.08 -8.08.0 9.09 -9.09.0 +9.09 PREHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -605,7 +605,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) +expressions: UDFToDouble(key) (type: double), key (type: string) outputColumnNames: _col0,
[1/2] hive git commit: HIVE-13878: Vectorization: Column pruning for Text vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
Repository: hive Updated Branches: refs/heads/master ff67cdda1 -> 0b62e6f38 http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java -- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index 472ace7..ee945d4 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -39,7 +39,7 @@ import org.apache.hadoop.io.WritableUtils; * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format. * * The caller is responsible for calling the read method for the right type of each field - * (after calling readCheckNull). + * (after calling readNextField). * * Reading some fields require a results object to receive value information. A separate * results object is created by the caller at initialization per different field even for the same @@ -65,17 +65,12 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead { private VInt tempVInt; private VLong tempVLong; - private boolean readBeyondConfiguredFieldsWarned; - private boolean bufferRangeHasExtraDataWarned; - public LazyBinaryDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer) { super(typeInfos, useExternalBuffer); fieldCount = typeInfos.length; tempVInt = new VInt(); tempVLong = new VLong(); currentExternalBufferNeeded = false; -readBeyondConfiguredFieldsWarned = false; -bufferRangeHasExtraDataWarned = false; } // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. @@ -122,22 +117,19 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead { } /* - * Reads the NULL information for a field. + * Reads the the next field. + * + * Afterwards, reading is positioned to the next field. + * + * @return Return true when the field was not null and data is put in the appropriate + * current* member. + * Otherwise, false when the field is null. * - * @return Returns true when the field is NULL; reading is positioned to the next field. - * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. */ @Override - public boolean readCheckNull() throws IOException { + public boolean readNextField() throws IOException { if (fieldIndex >= fieldCount) { - // Reading beyond the specified field count produces NULL. - if (!readBeyondConfiguredFieldsWarned) { -// Warn only once. -LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " -+ " reading more (NULLs returned). Ignoring similar problems."); -readBeyondConfiguredFieldsWarned = true; - } - return true; + return false; } fieldStart = offset; @@ -151,12 +143,24 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead { nullByte = bytes[offset++]; } -// NOTE: The bit is set to 1 if a field is NOT NULL. -boolean isNull; +// NOTE: The bit is set to 1 if a field is NOT NULL.boolean isNull; if ((nullByte & (1 << (fieldIndex % 8))) == 0) { - isNull = true; + + // Logically move past this field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { +if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { +throw new EOFException(); + } + nullByte = bytes[offset++]; +} + } + return false; } else { - isNull = false;// Assume. // Make sure there is at least one byte that can be read for a value. if (offset >= end) { @@ -336,24 +340,30 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead { HiveDecimal decimal = currentHiveDecimalWritable.getHiveDecimal(precision, scale); if (decimal == null) { -isNull = true; - } else { -// Put value back into writable. -currentHiveDecimalWritable.set(decimal); + +// Logically move past this field. +fieldIndex++; + +// Every 8 fields we read a new NULL byte. +if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { +// Get next null byte. +if (offset >= end) { + throw new EOFException(); +} +nullByte = bytes[offset++]; + } +} +return false;
[2/2] hive git commit: HIVE-13878: Vectorization: Column pruning for Text vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
HIVE-13878: Vectorization: Column pruning for Text vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b62e6f3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b62e6f3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b62e6f3 Branch: refs/heads/master Commit: 0b62e6f38788de81816abacf025d61bbc80d75fa Parents: ff67cdd Author: Matt McClineAuthored: Tue Sep 13 23:15:56 2016 -0700 Committer: Matt McCline Committed: Tue Sep 13 23:15:56 2016 -0700 -- .../ql/exec/vector/VectorDeserializeRow.java| 238 +++--- .../hive/ql/exec/vector/VectorMapOperator.java | 22 +- .../fast/VectorMapJoinFastLongHashTable.java| 2 +- .../fast/VectorMapJoinFastStringCommon.java | 2 +- .../VectorMapJoinOptimizedLongCommon.java | 56 -- .../VectorMapJoinOptimizedStringCommon.java | 26 - .../hive/ql/optimizer/physical/Vectorizer.java | 25 +- .../hive/ql/exec/vector/TestVectorSerDeRow.java | 14 +- .../mapjoin/fast/CheckFastRowHashMap.java | 10 +- .../exec/vector/mapjoin/fast/VerifyFastRow.java | 2 +- .../fast/BinarySortableDeserializeRead.java | 132 ++-- .../hive/serde2/fast/DeserializeRead.java | 71 +- .../lazy/fast/LazySimpleDeserializeRead.java| 770 ++- .../fast/LazyBinaryDeserializeRead.java | 119 +-- .../apache/hadoop/hive/serde2/VerifyFast.java | 2 +- .../binarysortable/TestBinarySortableFast.java | 35 +- .../hive/serde2/lazy/TestLazySimpleFast.java| 31 +- .../serde2/lazybinary/TestLazyBinaryFast.java | 32 +- 18 files changed, 787 insertions(+), 802 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0b62e6f3/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 47bef43..d31d338 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.EOFException; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.slf4j.Logger; @@ -97,20 +98,27 @@ public final class VectorDeserializeRow { * We say "source" because when there is conversion we are converting th deserialized source into * a target data type. */ - boolean[] isConvert; + + private boolean useReadField; +// True when the (random access) readField method of DeserializeRead are being used. + + private int[] readFieldLogicalIndices; +// The logical indices for reading with readField. + + private boolean[] isConvert; // For each column, are we converting the row column? - int[] projectionColumnNums; + private int[] projectionColumnNums; // Assigning can be a subset of columns, so this is the projection -- // the batch column numbers. - Category[] sourceCategories; + private Category[] sourceCategories; // The data type category of each column being deserialized. - PrimitiveCategory[] sourcePrimitiveCategories; + private PrimitiveCategory[] sourcePrimitiveCategories; //The data type primitive category of each column being deserialized. - int[] maxLengths; + private int[] maxLengths; // For the CHAR and VARCHAR data types, the maximum character length of // the columns. Otherwise, 0. @@ -131,6 +139,7 @@ public final class VectorDeserializeRow { private void allocateArrays(int count) { isConvert = new boolean[count]; projectionColumnNums = new int[count]; +Arrays.fill(projectionColumnNums, -1); sourceCategories = new Category[count]; sourcePrimitiveCategories = new PrimitiveCategory[count]; maxLengths = new int[count]; @@ -231,14 +240,18 @@ public final class VectorDeserializeRow { public void init(boolean[] columnsToIncludeTruncated) throws HiveException { -if (columnsToIncludeTruncated != null) { - deserializeRead.setColumnsToInclude(columnsToIncludeTruncated); -} +// When truncated included is used, its length must be at least the number of source type infos. +// When longer, we assume the caller will default with nulls, etc. +Preconditions.checkState( +columnsToIncludeTruncated == null || +columnsToIncludeTruncated.length == sourceTypeInfos.length); -final int