[1/3] hive git commit: HIVE-14913 : addendum patch

2016-10-21 Thread hashutosh
Repository: hive
Updated Branches:
  refs/heads/master f2efa6a2b -> 6cca9911b


http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out 
b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
index f50d28b..8eb6e35 100644
--- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
@@ -221,11 +221,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 16711
-   HDFS_BYTES_WRITTEN: 104
-   HDFS_READ_OPS: 5
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -245,11 +240,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 101
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
RECORDS_OUT_0: 1
@@ -260,11 +250,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 638
-   HDFS_BYTES_WRITTEN: 101
-   HDFS_READ_OPS: 4
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -287,11 +272,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -312,11 +292,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -338,11 +313,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -364,11 +334,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 101
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -390,11 +355,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -415,11 +375,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -441,11 +396,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 104
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -467,11 +417,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 102
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -493,11 +438,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 104
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -519,11 +459,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
  A masked pattern was here 
 Stage-1 FILE SYSTEM COUNTERS:
-   HDFS_BYTES_READ: 0
-   HDFS_BYTES_WRITTEN: 101
-   HDFS_READ_OPS: 2
-   HDFS_LARGE_READ_OPS: 0
-   HDFS_WRITE_OPS: 2
 Stage-1 HIVE COUNTERS:
CREATED_FILES: 1
DESERIALIZE_ERRORS: 0
@@ -545,11 +480,6 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@orc_ppd
 

[3/3] hive git commit: HIVE-14913 : addendum patch

2016-10-21 Thread hashutosh
HIVE-14913 : addendum patch


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6cca9911
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6cca9911
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6cca9911

Branch: refs/heads/master
Commit: 6cca9911bde22dbe9dfacbfa188dd7a56062f202
Parents: f2efa6a
Author: Ashutosh Chauhan 
Authored: Fri Oct 21 06:51:44 2016 -0700
Committer: Ashutosh Chauhan 
Committed: Fri Oct 21 06:51:44 2016 -0700

--
 .../test/queries/clientpositive/orc_ppd_basic.q |6 +
 .../clientpositive/current_date_timestamp.q.out |  376 --
 .../llap/current_date_timestamp.q.out   | 4780 ++
 .../clientpositive/llap/orc_ppd_basic.q.out |  467 +-
 4 files changed, 4884 insertions(+), 745 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/queries/clientpositive/orc_ppd_basic.q
--
diff --git a/ql/src/test/queries/clientpositive/orc_ppd_basic.q 
b/ql/src/test/queries/clientpositive/orc_ppd_basic.q
index c3ba24a..41134a0 100644
--- a/ql/src/test/queries/clientpositive/orc_ppd_basic.q
+++ b/ql/src/test/queries/clientpositive/orc_ppd_basic.q
@@ -191,6 +191,12 @@ set hive.optimize.index.filter=true;
 select count(*) from orc_ppd where f=74.72;
 
 
+RESET;
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+SET hive.fetch.task.conversion=none;
+SET hive.optimize.index.filter=true;
+SET hive.cbo.enable=false;
 create temporary table tmp_orcppd
 stored as orc
 as select ctinyint, csmallint, cint , cbigint, cfloat, 
cdouble,

http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/results/clientpositive/current_date_timestamp.q.out
--
diff --git a/ql/src/test/results/clientpositive/current_date_timestamp.q.out 
b/ql/src/test/results/clientpositive/current_date_timestamp.q.out
deleted file mode 100644
index 93e5104..000
--- a/ql/src/test/results/clientpositive/current_date_timestamp.q.out
+++ /dev/null
@@ -1,376 +0,0 @@
-PREHOOK: query: select current_timestamp = current_timestamp(), current_date = 
current_date() from src limit 5
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
- A masked pattern was here 
-POSTHOOK: query: select current_timestamp = current_timestamp(), current_date 
= current_date() from src limit 5
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
- A masked pattern was here 
-true   true
-true   true
-true   true
-true   true
-true   true
-PREHOOK: query: --ensure that timestamp is same for all the rows while using 
current_timestamp() query should return single row
-select count(*) from (select current_timestamp() from alltypesorc union select 
current_timestamp() from src limit 5 ) subq
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Input: default@src
- A masked pattern was here 
-POSTHOOK: query: --ensure that timestamp is same for all the rows while using 
current_timestamp() query should return single row
-select count(*) from (select current_timestamp() from alltypesorc union select 
current_timestamp() from src limit 5 ) subq
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Input: default@src
- A masked pattern was here 
-1
-PREHOOK: query: select count(*) from (select current_timestamp() from 
alltypesorc
-union
-  select current_timestamp() from src
-  limit 5 ) subqr
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Input: default@src
- A masked pattern was here 
-POSTHOOK: query: select count(*) from (select current_timestamp() from 
alltypesorc
-union
-  select current_timestamp() from src
-  limit 5 ) subqr
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Input: default@src
- A masked pattern was here 
-1
-PREHOOK: query: --current_timestamp() should appear as expression
-explain extended select current_timestamp() from alltypesorc
-PREHOOK: type: QUERY
-POSTHOOK: query: --current_timestamp() should appear as expression
-explain extended select current_timestamp() from alltypesorc
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-0 is a root stage
-
-STAGE PLANS:
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-TableScan
-  alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE 
Column stats: COMPLETE
-  GatherStats: false
-  Select Operator
-expressions: 

[2/3] hive git commit: HIVE-14913 : addendum patch

2016-10-21 Thread hashutosh
http://git-wip-us.apache.org/repos/asf/hive/blob/6cca9911/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out 
b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
new file mode 100644
index 000..27e0896
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
@@ -0,0 +1,4780 @@
+PREHOOK: query: select current_timestamp = current_timestamp(), current_date = 
current_date() from src limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+ A masked pattern was here 
+POSTHOOK: query: select current_timestamp = current_timestamp(), current_date 
= current_date() from src limit 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+ A masked pattern was here 
+true   true
+true   true
+true   true
+true   true
+true   true
+PREHOOK: query: --ensure that timestamp is same for all the rows while using 
current_timestamp() query should return single row
+select count(*) from (select current_timestamp() from alltypesorc union select 
current_timestamp() from src limit 5 ) subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@src
+ A masked pattern was here 
+POSTHOOK: query: --ensure that timestamp is same for all the rows while using 
current_timestamp() query should return single row
+select count(*) from (select current_timestamp() from alltypesorc union select 
current_timestamp() from src limit 5 ) subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@src
+ A masked pattern was here 
+1
+PREHOOK: query: select count(*) from (select current_timestamp() from 
alltypesorc
+union
+  select current_timestamp() from src
+  limit 5 ) subqr
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@src
+ A masked pattern was here 
+POSTHOOK: query: select count(*) from (select current_timestamp() from 
alltypesorc
+union
+  select current_timestamp() from src
+  limit 5 ) subqr
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@src
+ A masked pattern was here 
+1
+PREHOOK: query: --current_timestamp() should appear as expression
+explain extended select current_timestamp() from alltypesorc
+PREHOOK: type: QUERY
+POSTHOOK: query: --current_timestamp() should appear as expression
+explain extended select current_timestamp() from alltypesorc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+TableScan
+  alias: alltypesorc
+  GatherStats: false
+  Select Operator
+expressions: 2012-01-01 01:02:03.0 (type: timestamp)
+outputColumnNames: _col0
+ListSink
+
+PREHOOK: query: --current_timestamp() + insert
+create temporary table tmp_runtimeconstant(
+  ts1 timestamp,
+  ts2 timestamp,
+  dt date,
+  s string,
+  v varchar(50),
+  c char(50)
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_runtimeconstant
+POSTHOOK: query: --current_timestamp() + insert
+create temporary table tmp_runtimeconstant(
+  ts1 timestamp,
+  ts2 timestamp,
+  dt date,
+  s string,
+  v varchar(50),
+  c char(50)
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_runtimeconstant
+PREHOOK: query: insert into table tmp_runtimeconstant
+  select current_timestamp(),
+ cast(current_timestamp() as timestamp),
+ cast(current_timestamp() as date),
+ cast(current_timestamp() as string),
+ cast(current_timestamp() as varchar(50)),
+ cast(current_timestamp() as char(50))
+  from alltypesorc limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@tmp_runtimeconstant
+POSTHOOK: query: insert into table tmp_runtimeconstant
+  select current_timestamp(),
+ cast(current_timestamp() as timestamp),
+ cast(current_timestamp() as date),
+ cast(current_timestamp() as string),
+ cast(current_timestamp() as varchar(50)),
+ 

hive git commit: HIVE-14920: S3: Optimize SimpleFetchOptimizer::checkThreshold() (Rajesh Balamohan reviewed by Ashutosh Chauhan)

2016-10-21 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/master 1dad8 -> f2efa6a2b


HIVE-14920: S3: Optimize SimpleFetchOptimizer::checkThreshold() (Rajesh 
Balamohan reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f2efa6a2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f2efa6a2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f2efa6a2

Branch: refs/heads/master
Commit: f2efa6a2be52f09e700c931a293c816a446bf619
Parents: 1da
Author: Prasanth Jayachandran 
Authored: Fri Oct 21 00:38:36 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Fri Oct 21 00:38:36 2016 -0700

--
 .../hive/ql/optimizer/SimpleFetchOptimizer.java | 115 +--
 1 file changed, 82 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/f2efa6a2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
index eb0ba7b..0481110 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -26,7 +26,16 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicLong;
+
+import com.google.common.collect.Lists;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.ContentSummary;
@@ -162,13 +171,7 @@ public class SimpleFetchOptimizer extends Transform {
 return true;
   }
 }
-long remaining = threshold;
-remaining -= data.getInputLength(pctx, remaining);
-if (remaining < 0) {
-  LOG.info("Threshold " + remaining + " exceeded for pseudoMR mode");
-  return false;
-}
-return true;
+return data.isDataLengthWitInThreshold(pctx, threshold);
   }
 
   // all we can handle is LimitOperator, FilterOperator SelectOperator and 
final FS
@@ -414,18 +417,16 @@ public class SimpleFetchOptimizer extends Transform {
   return replaceFSwithLS(fileSink, work.getSerializationNullFormat());
 }
 
-private long getInputLength(ParseContext pctx, long remaining) throws 
Exception {
+private boolean isDataLengthWitInThreshold(ParseContext pctx, final long 
threshold)
+throws Exception {
   if (splitSample != null && splitSample.getTotalLength() != null) {
-return splitSample.getTotalLength();
-  }
-  if (splitSample != null) {
-return splitSample.getTargetSize(calculateLength(pctx, 
splitSample.estimateSourceSize(remaining)));
+if (LOG.isDebugEnabled()) {
+  LOG.debug("Threshold " + splitSample.getTotalLength() + " exceeded 
for pseudoMR mode");
+}
+return (threshold - splitSample.getTotalLength()) > 0;
   }
-  return calculateLength(pctx, remaining);
-}
 
-private long calculateLength(ParseContext pctx, long remaining) throws 
Exception {
-  JobConf jobConf = new JobConf(pctx.getConf());
+  final JobConf jobConf = new JobConf(pctx.getConf());
   Utilities.setColumnNameList(jobConf, scanOp, true);
   Utilities.setColumnTypeList(jobConf, scanOp, true);
   HiveStorageHandler handler = table.getStorageHandler();
@@ -434,41 +435,89 @@ public class SimpleFetchOptimizer extends Transform {
 TableDesc tableDesc = Utilities.getTableDesc(table);
 PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
 Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
-return estimator.estimate(jobConf, scanOp, remaining).getTotalLength();
+long len = estimator.estimate(jobConf, scanOp, 
threshold).getTotalLength();
+if (LOG.isDebugEnabled()) {
+  LOG.debug("Threshold " + len + " exceeded for pseudoMR mode");
+}
+return (threshold - len) > 0;
   }
   if (table.isNonNative()) {
-return 0; // nothing can be done
+return true; // nothing can be done
   }
   if (!table.isPartitioned()) {
-return getFileLength(jobConf, table.getPath(), 
table.getInputFormatClass());
+long len = getPathLength(jobConf, table.getPath(), 
table.getInputFormatClass(), threshold);
+if