[1/3] hive git commit: HIVE-14913 : addendum patch
Repository: hive Updated Branches: refs/heads/master f2efa6a2b -> 6cca9911b http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out index f50d28b..8eb6e35 100644 --- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out @@ -221,11 +221,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 16711 - HDFS_BYTES_WRITTEN: 104 - HDFS_READ_OPS: 5 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -245,11 +240,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 RECORDS_OUT_0: 1 @@ -260,11 +250,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 638 - HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 4 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -287,11 +272,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -312,11 +292,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -338,11 +313,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -364,11 +334,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -390,11 +355,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -415,11 +375,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -441,11 +396,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 104 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -467,11 +417,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 102 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -493,11 +438,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 104 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -519,11 +459,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd A masked pattern was here Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 - HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 2 - HDFS_LARGE_READ_OPS: 0 - HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 @@ -545,11 +480,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd
[3/3] hive git commit: HIVE-14913 : addendum patch
HIVE-14913 : addendum patch Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6cca9911 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6cca9911 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6cca9911 Branch: refs/heads/master Commit: 6cca9911bde22dbe9dfacbfa188dd7a56062f202 Parents: f2efa6a Author: Ashutosh ChauhanAuthored: Fri Oct 21 06:51:44 2016 -0700 Committer: Ashutosh Chauhan Committed: Fri Oct 21 06:51:44 2016 -0700 -- .../test/queries/clientpositive/orc_ppd_basic.q |6 + .../clientpositive/current_date_timestamp.q.out | 376 -- .../llap/current_date_timestamp.q.out | 4780 ++ .../clientpositive/llap/orc_ppd_basic.q.out | 467 +- 4 files changed, 4884 insertions(+), 745 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/queries/clientpositive/orc_ppd_basic.q -- diff --git a/ql/src/test/queries/clientpositive/orc_ppd_basic.q b/ql/src/test/queries/clientpositive/orc_ppd_basic.q index c3ba24a..41134a0 100644 --- a/ql/src/test/queries/clientpositive/orc_ppd_basic.q +++ b/ql/src/test/queries/clientpositive/orc_ppd_basic.q @@ -191,6 +191,12 @@ set hive.optimize.index.filter=true; select count(*) from orc_ppd where f=74.72; +RESET; +set hive.compute.query.using.stats=false; +set hive.mapred.mode=nonstrict; +SET hive.fetch.task.conversion=none; +SET hive.optimize.index.filter=true; +SET hive.cbo.enable=false; create temporary table tmp_orcppd stored as orc as select ctinyint, csmallint, cint , cbigint, cfloat, cdouble, http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/results/clientpositive/current_date_timestamp.q.out -- diff --git a/ql/src/test/results/clientpositive/current_date_timestamp.q.out b/ql/src/test/results/clientpositive/current_date_timestamp.q.out deleted file mode 100644 index 93e5104..000 --- a/ql/src/test/results/clientpositive/current_date_timestamp.q.out +++ /dev/null @@ -1,376 +0,0 @@ -PREHOOK: query: select current_timestamp = current_timestamp(), current_date = current_date() from src limit 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src - A masked pattern was here -POSTHOOK: query: select current_timestamp = current_timestamp(), current_date = current_date() from src limit 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src - A masked pattern was here -true true -true true -true true -true true -true true -PREHOOK: query: --ensure that timestamp is same for all the rows while using current_timestamp() query should return single row -select count(*) from (select current_timestamp() from alltypesorc union select current_timestamp() from src limit 5 ) subq -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Input: default@src - A masked pattern was here -POSTHOOK: query: --ensure that timestamp is same for all the rows while using current_timestamp() query should return single row -select count(*) from (select current_timestamp() from alltypesorc union select current_timestamp() from src limit 5 ) subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Input: default@src - A masked pattern was here -1 -PREHOOK: query: select count(*) from (select current_timestamp() from alltypesorc -union - select current_timestamp() from src - limit 5 ) subqr -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Input: default@src - A masked pattern was here -POSTHOOK: query: select count(*) from (select current_timestamp() from alltypesorc -union - select current_timestamp() from src - limit 5 ) subqr -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Input: default@src - A masked pattern was here -1 -PREHOOK: query: --current_timestamp() should appear as expression -explain extended select current_timestamp() from alltypesorc -PREHOOK: type: QUERY -POSTHOOK: query: --current_timestamp() should appear as expression -explain extended select current_timestamp() from alltypesorc -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 -Fetch Operator - limit: -1 - Processor Tree: -TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator -expressions:
[2/3] hive git commit: HIVE-14913 : addendum patch
http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out new file mode 100644 index 000..27e0896 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out @@ -0,0 +1,4780 @@ +PREHOOK: query: select current_timestamp = current_timestamp(), current_date = current_date() from src limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: query: select current_timestamp = current_timestamp(), current_date = current_date() from src limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src + A masked pattern was here +true true +true true +true true +true true +true true +PREHOOK: query: --ensure that timestamp is same for all the rows while using current_timestamp() query should return single row +select count(*) from (select current_timestamp() from alltypesorc union select current_timestamp() from src limit 5 ) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: query: --ensure that timestamp is same for all the rows while using current_timestamp() query should return single row +select count(*) from (select current_timestamp() from alltypesorc union select current_timestamp() from src limit 5 ) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src + A masked pattern was here +1 +PREHOOK: query: select count(*) from (select current_timestamp() from alltypesorc +union + select current_timestamp() from src + limit 5 ) subqr +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@src + A masked pattern was here +POSTHOOK: query: select count(*) from (select current_timestamp() from alltypesorc +union + select current_timestamp() from src + limit 5 ) subqr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@src + A masked pattern was here +1 +PREHOOK: query: --current_timestamp() should appear as expression +explain extended select current_timestamp() from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: --current_timestamp() should appear as expression +explain extended select current_timestamp() from alltypesorc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 +Fetch Operator + limit: -1 + Processor Tree: +TableScan + alias: alltypesorc + GatherStats: false + Select Operator +expressions: 2012-01-01 01:02:03.0 (type: timestamp) +outputColumnNames: _col0 +ListSink + +PREHOOK: query: --current_timestamp() + insert +create temporary table tmp_runtimeconstant( + ts1 timestamp, + ts2 timestamp, + dt date, + s string, + v varchar(50), + c char(50) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_runtimeconstant +POSTHOOK: query: --current_timestamp() + insert +create temporary table tmp_runtimeconstant( + ts1 timestamp, + ts2 timestamp, + dt date, + s string, + v varchar(50), + c char(50) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_runtimeconstant +PREHOOK: query: insert into table tmp_runtimeconstant + select current_timestamp(), + cast(current_timestamp() as timestamp), + cast(current_timestamp() as date), + cast(current_timestamp() as string), + cast(current_timestamp() as varchar(50)), + cast(current_timestamp() as char(50)) + from alltypesorc limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@tmp_runtimeconstant +POSTHOOK: query: insert into table tmp_runtimeconstant + select current_timestamp(), + cast(current_timestamp() as timestamp), + cast(current_timestamp() as date), + cast(current_timestamp() as string), + cast(current_timestamp() as varchar(50)), +
hive git commit: HIVE-14920: S3: Optimize SimpleFetchOptimizer::checkThreshold() (Rajesh Balamohan reviewed by Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/master 1dad8 -> f2efa6a2b HIVE-14920: S3: Optimize SimpleFetchOptimizer::checkThreshold() (Rajesh Balamohan reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f2efa6a2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f2efa6a2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f2efa6a2 Branch: refs/heads/master Commit: f2efa6a2be52f09e700c931a293c816a446bf619 Parents: 1da Author: Prasanth JayachandranAuthored: Fri Oct 21 00:38:36 2016 -0700 Committer: Prasanth Jayachandran Committed: Fri Oct 21 00:38:36 2016 -0700 -- .../hive/ql/optimizer/SimpleFetchOptimizer.java | 115 +-- 1 file changed, 82 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/f2efa6a2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index eb0ba7b..0481110 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -26,7 +26,16 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; - +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; + +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.RemoteIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.ContentSummary; @@ -162,13 +171,7 @@ public class SimpleFetchOptimizer extends Transform { return true; } } -long remaining = threshold; -remaining -= data.getInputLength(pctx, remaining); -if (remaining < 0) { - LOG.info("Threshold " + remaining + " exceeded for pseudoMR mode"); - return false; -} -return true; +return data.isDataLengthWitInThreshold(pctx, threshold); } // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS @@ -414,18 +417,16 @@ public class SimpleFetchOptimizer extends Transform { return replaceFSwithLS(fileSink, work.getSerializationNullFormat()); } -private long getInputLength(ParseContext pctx, long remaining) throws Exception { +private boolean isDataLengthWitInThreshold(ParseContext pctx, final long threshold) +throws Exception { if (splitSample != null && splitSample.getTotalLength() != null) { -return splitSample.getTotalLength(); - } - if (splitSample != null) { -return splitSample.getTargetSize(calculateLength(pctx, splitSample.estimateSourceSize(remaining))); +if (LOG.isDebugEnabled()) { + LOG.debug("Threshold " + splitSample.getTotalLength() + " exceeded for pseudoMR mode"); +} +return (threshold - splitSample.getTotalLength()) > 0; } - return calculateLength(pctx, remaining); -} -private long calculateLength(ParseContext pctx, long remaining) throws Exception { - JobConf jobConf = new JobConf(pctx.getConf()); + final JobConf jobConf = new JobConf(pctx.getConf()); Utilities.setColumnNameList(jobConf, scanOp, true); Utilities.setColumnTypeList(jobConf, scanOp, true); HiveStorageHandler handler = table.getStorageHandler(); @@ -434,41 +435,89 @@ public class SimpleFetchOptimizer extends Transform { TableDesc tableDesc = Utilities.getTableDesc(table); PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc); Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf); -return estimator.estimate(jobConf, scanOp, remaining).getTotalLength(); +long len = estimator.estimate(jobConf, scanOp, threshold).getTotalLength(); +if (LOG.isDebugEnabled()) { + LOG.debug("Threshold " + len + " exceeded for pseudoMR mode"); +} +return (threshold - len) > 0; } if (table.isNonNative()) { -return 0; // nothing can be done +return true; // nothing can be done } if (!table.isPartitioned()) { -return getFileLength(jobConf, table.getPath(), table.getInputFormatClass()); +long len = getPathLength(jobConf, table.getPath(), table.getInputFormatClass(), threshold); +if