date:20181112

[2/2] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)

2018-11-12 Thread hashutosh

HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey 
Shelukhin)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af401702
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af401702
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af401702

Branch: refs/heads/master
Commit: af401702847391ab41fcf2ef8216a94a1b7bfc76
Parents: bc39c49
Author: Miklos Gergely 
Authored: Thu Oct 25 13:03:00 2018 -0700
Committer: Ashutosh Chauhan 
Committed: Mon Nov 12 15:28:18 2018 -0800

--
 bin/ext/llapstatus.sh   |   4 +-
 .../hadoop/hive/llap/cli/LlapSliderUtils.java   |  55 +-
 .../llap/cli/LlapStatusOptionsProcessor.java| 278 ---
 .../hive/llap/cli/LlapStatusServiceDriver.java  | 811 ---
 .../hadoop/hive/llap/cli/status/AmInfo.java |  93 +++
 .../hive/llap/cli/status/AppStatusBuilder.java  | 231 ++
 .../hadoop/hive/llap/cli/status/ExitCode.java   |  44 +
 .../hive/llap/cli/status/LlapInstance.java  | 134 +++
 .../llap/cli/status/LlapStatusCliException.java |  40 +
 .../hive/llap/cli/status/LlapStatusHelpers.java | 449 --
 .../status/LlapStatusServiceCommandLine.java| 302 +++
 .../cli/status/LlapStatusServiceDriver.java | 775 ++
 .../hadoop/hive/llap/cli/status/State.java  |  31 +
 .../hive/llap/cli/status/package-info.java  |  24 +
 .../llap/cli/TestLlapStatusServiceDriver.java   |  98 ---
 .../TestLlapStatusServiceCommandLine.java   |  91 +++
 .../hive/llap/cli/status/package-info.java  |  23 +
 .../java/org/apache/hive/http/LlapServlet.java  |   9 +-
 18 files changed, 1799 insertions(+), 1693 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/bin/ext/llapstatus.sh
--
diff --git a/bin/ext/llapstatus.sh b/bin/ext/llapstatus.sh
index 2d2c8f4..23e6be6 100644
--- a/bin/ext/llapstatus.sh
+++ b/bin/ext/llapstatus.sh
@@ -17,7 +17,7 @@ THISSERVICE=llapstatus
 export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
 
 llapstatus () {
-  CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver;
+  CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver;
   if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then
 echo "Missing Hive CLI Jar"
 exit 3;
@@ -36,7 +36,7 @@ llapstatus () {
 }
 
 llapstatus_help () {
-  CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver;
+  CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver;
   execHiveCmd $CLASS "--help"
 } 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
index af47b26..5ec9e1d 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
@@ -24,69 +24,24 @@ import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.service.api.records.Service;
 import org.apache.hadoop.yarn.service.client.ServiceClient;
 import org.apache.hadoop.yarn.service.utils.CoreFileSystem;
-import org.apache.hadoop.yarn.util.Clock;
-import org.apache.hadoop.yarn.util.SystemClock;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class LlapSliderUtils {
-  private static final Logger LOG = LoggerFactory
-  .getLogger(LlapSliderUtils.class);
+  private static final Logger LOG = 
LoggerFactory.getLogger(LlapSliderUtils.class);
   private static final String LLAP_PACKAGE_DIR = ".yarn/package/LLAP/";
 
-  public static ServiceClient createServiceClient(
-  Configuration conf) throws Exception {
+  public static ServiceClient createServiceClient(Configuration conf) throws 
Exception {
 ServiceClient serviceClient = new ServiceClient();
 serviceClient.init(conf);
 serviceClient.start();
 return serviceClient;
   }
 
-  public static ApplicationReport getAppReport(String appName, ServiceClient 
serviceClient,
-   long timeoutMs) throws
-  LlapStatusServiceDriver.LlapStatusCliException {
-Clock clock = SystemClock.getInstance();
-long startTime = clock.getTime();
-long timeoutTime =

[1/2] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)

2018-11-12 Thread hashutosh

Repository: hive
Updated Branches:
  refs/heads/master bc39c4998 -> af4017028


http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
new file mode 100644
index 000..bee5079
--- /dev/null
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.cli.status;
+
+import java.util.Arrays;
+import java.util.Properties;
+
+import jline.TerminalFactory;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Parses, verifies, prints and provides the command line arguments of the 
Llap Status program.
+ */
+public class LlapStatusServiceCommandLine {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger("LlapStatusServiceDriverConsole");
+
+  @VisibleForTesting
+  static final long DEFAULT_FIND_YARN_APP_TIMEOUT_MS = 20 * 1000L;
+  @VisibleForTesting
+  static final long DEFAULT_STATUS_REFRESH_INTERVAL_MS = 1 * 1000L;
+  @VisibleForTesting
+  static final long DEFAULT_WATCH_MODE_TIMEOUT_MS = 5 * 60 * 1000L;
+  @VisibleForTesting
+  static final float DEFAULT_RUNNING_NODES_THRESHOLD = 1.0f;
+
+  @SuppressWarnings("static-access")
+  private static final Option NAME = OptionBuilder
+  .withLongOpt("name")
+  .withDescription("LLAP cluster name")
+  .withArgName("name")
+  .hasArg()
+  .create('n');
+
+  @SuppressWarnings("static-access")
+  private static final Option FIND_APP_TIMEOUT = OptionBuilder
+  .withLongOpt("findAppTimeout")
+  .withDescription("Amount of time(s) that the tool will sleep to wait for 
the YARN application to start." +
+  "negative values=wait forever, 0=Do not wait. default=" + 
(DEFAULT_FIND_YARN_APP_TIMEOUT_MS / 1000) + "s")
+  .withArgName("findAppTimeout")
+  .hasArg()
+  .create('f');
+
+  @SuppressWarnings("static-access")
+  private static final Option OUTPUT_FILE = OptionBuilder
+  .withLongOpt("outputFile")
+  .withDescription("File to which output should be written (Default 
stdout)")
+  .withArgName("outputFile")
+  .hasArg()
+  .create('o');
+
+  @SuppressWarnings("static-access")
+  private static final Option WATCH_MODE = OptionBuilder
+  .withLongOpt("watch")
+  .withDescription("Watch mode waits until all LLAP daemons are running or 
subset of the nodes are running " +
+  "(threshold can be specified via -r option) (Default wait until all 
nodes are running)")
+  .withArgName("watch")
+  .create('w');
+
+  @SuppressWarnings("static-access")
+  private static final Option NOT_LAUNCHED = OptionBuilder
+  .withLongOpt("notLaunched")
+  .withDescription("In watch mode, do not assume that the application was 
already launched if there's doubt " +
+  "(e.g. if the last application instance has failed).")
+  .withArgName("notLaunched")
+  .create('l');
+
+  @SuppressWarnings("static-access")
+  private static final Option RUNNING_NODES_THRESHOLD = OptionBuilder
+  .withLongOpt("runningNodesThreshold")
+  .withDescription("When watch mode is enabled (-w), wait until the 
specified threshold of nodes are running " +
+  "(Default 1.0 which means 100% nodes are running)")
+  .withArgName("runningNodesThreshold")
+  .hasArg()
+  .create('r');
+
+  @SuppressWarnings("static-access")
+  private static final Option REFRESH_INTERVAL = OptionBuilder
+  .withLongOpt("refreshInterval")
+

[4/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)

2018-11-12 Thread bslim

http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
--
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
index 2783016..89d74e3 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
@@ -36,17 +36,16 @@ public class HiveDruidSplit extends FileSplit implements 
org.apache.hadoop.mapre
 
   // required for deserialization
   public HiveDruidSplit() {
-super((Path) null, 0, 0, (String[]) null);
+super(null, 0, 0, (String[]) null);
   }
 
-  public HiveDruidSplit(String druidQuery, Path dummyPath, String hosts[]) {
+  public HiveDruidSplit(String druidQuery, Path dummyPath, String[] hosts) {
 super(dummyPath, 0, 0, hosts);
 this.druidQuery = druidQuery;
 this.hosts = hosts;
   }
 
-  @Override
-  public void write(DataOutput out) throws IOException {
+  @Override public void write(DataOutput out) throws IOException {
 super.write(out);
 out.writeUTF(druidQuery);
 out.writeInt(hosts.length);
@@ -55,8 +54,7 @@ public class HiveDruidSplit extends FileSplit implements 
org.apache.hadoop.mapre
 }
   }
 
-  @Override
-  public void readFields(DataInput in) throws IOException {
+  @Override public void readFields(DataInput in) throws IOException {
 super.readFields(in);
 druidQuery = in.readUTF();
 int length = in.readInt();
@@ -71,15 +69,12 @@ public class HiveDruidSplit extends FileSplit implements 
org.apache.hadoop.mapre
 return druidQuery;
   }
 
-  @Override
-  public String[] getLocations() throws IOException {
+  @Override public String[] getLocations() throws IOException {
 return hosts;
   }
 
-  @Override
-  public String toString() {
-return "HiveDruidSplit{" + druidQuery + ", "
-+ (hosts == null ? "empty hosts" : Arrays.toString(hosts)) + "}";
+  @Override public String toString() {
+return "HiveDruidSplit{" + druidQuery + ", " + (hosts == null ? "empty 
hosts" : Arrays.toString(hosts)) + "}";
   }
 
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java
--
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java
 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java
index 425a5bb..c1b3bf8 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,13 +17,11 @@
  */
 package org.apache.hadoop.hive.druid.json;
 
-import io.druid.java.util.common.StringUtils;
-
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
-
+import io.druid.java.util.common.StringUtils;
 import org.joda.time.Duration;
 import org.joda.time.Period;
 
@@ -33,8 +31,7 @@ import java.util.Map;
  * This class is copied from druid source code
  * in order to avoid adding additional dependencies on druid-indexing-service.
  */
-public class KafkaSupervisorIOConfig
-{
+public class KafkaSupervisorIOConfig {
   public static final String BOOTSTRAP_SERVERS_KEY = "bootstrap.servers";
 
   private final String topic;
@@ -46,13 +43,11 @@ public class KafkaSupervisorIOConfig
   private final Duration period;
   private final boolean useEarliestOffset;
   private final Duration completionTimeout;
-  private final Optional lateMessageRejectionPeriod;
-  private final Optional earlyMessageRejectionPeriod;
+  @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private final 
Optional lateMessageRejectionPeriod;
+  @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private final 
Optional earlyMessageRejectionPeriod;
   private final boolean skipOffsetGaps;
 
-  @JsonCreator
-  public KafkaSupervisorIOConfig(
-  @JsonProperty("topic") String topic,
+  @JsonCreator public KafkaSupervisorIOConfig(@JsonProperty("topic") String 
topic,

[2/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)

2018-11-12 Thread bslim

http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
--
diff --git 
a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java 
b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
index acde239..e27f8cf 100644
--- 
a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
+++ 
b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java
@@ -33,11 +33,11 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.Properties;
+import java.util.stream.Collectors;
 
 import io.druid.java.util.http.client.HttpClient;
 import io.druid.java.util.http.client.response.HttpResponseHandler;
 import io.druid.query.scan.ScanResultValue;
-import io.druid.query.select.EventHolder;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveChar;
@@ -77,12 +77,8 @@ import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 
-import com.fasterxml.jackson.core.JsonParseException;
 import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.databind.JsonMappingException;
-import com.google.common.base.Function;
 import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.SettableFuture;
 
 import io.druid.data.input.Row;
@@ -97,52 +93,54 @@ import org.junit.rules.ExpectedException;
  * Basic tests for Druid SerDe. The examples are taken from Druid 0.9.1.1
  * documentation.
  */
-public class TestDruidSerDe {
+@SuppressWarnings({ "SameParameterValue", "SpellCheckingInspection" }) public 
class TestDruidSerDe {
   // Timeseries query
-  private static final String TIMESERIES_QUERY =
-  "{  \"queryType\": \"timeseries\", "
-  + " \"dataSource\": \"sample_datasource\", "
-  + " \"granularity\": \"day\", "
-  + " \"descending\": \"true\", "
-  + " \"filter\": {  "
-  + "  \"type\": \"and\",  "
-  + "  \"fields\": [   "
-  + "   { \"type\": \"selector\", \"dimension\": 
\"sample_dimension1\", \"value\": \"sample_value1\" },   "
-  + "   { \"type\": \"or\","
-  + "\"fields\": [ "
-  + " { \"type\": \"selector\", \"dimension\": 
\"sample_dimension2\", \"value\": \"sample_value2\" }, "
-  + " { \"type\": \"selector\", \"dimension\": 
\"sample_dimension3\", \"value\": \"sample_value3\" }"
-  + "]   "
-  + "   }  "
-  + "  ] "
-  + " }, "
-  + " \"aggregations\": [  "
-  + "  { \"type\": \"longSum\", \"name\": \"sample_name1\", 
\"fieldName\": \"sample_fieldName1\" },  "
-  + "  { \"type\": \"doubleSum\", \"name\": \"sample_name2\", 
\"fieldName\": \"sample_fieldName2\" } "
-  + " ], "
-  + " \"postAggregations\": [  "
-  + "  { \"type\": \"arithmetic\",  "
-  + "\"name\": \"sample_divide\",  "
-  + "\"fn\": \"/\",  "
-  + "\"fields\": [   "
-  + " { \"type\": \"fieldAccess\", \"name\": 
\"postAgg__sample_name1\", \"fieldName\": \"sample_name1\" },   "
-  + " { \"type\": \"fieldAccess\", \"name\": 
\"postAgg__sample_name2\", \"fieldName\": \"sample_name2\" }  "
-  + "]  "
-  + "  } "
-  + " ], "
-  + " \"intervals\": [ 
\"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ]}";
+  private static final String
+  TIMESERIES_QUERY =
+  "{  \"queryType\": \"timeseries\", "
+  + " \"dataSource\": \"sample_datasource\", "
+  + " \"granularity\": \"day\", "
+  + " \"descending\": \"true\", "
+  + " \"filter\": {  "
+  + "  \"type\": \"and\",  "
+  + "  \"fields\": [   "
+  + "   { \"type\": \"selector\", \"dimension\": 
\"sample_dimension1\", \"value\": \"sample_value1\" },   "
+  + "   { \"type\": \"or\","
+  + "\"fields\": [ "
+  + " { \"type\": \"selector\", \"dimension\": 
\"sample_dimension2\", \"value\": \"sample_value2\" }, "
+  + " { \"type\": \"selector\", \"dimension\": 
\"sample_dimension3\", \"value\": \"sample_value3\" }"
+  + "]   "
+  + "   }  "
+  + "  ] "
+  + " }, "
+  + " \"aggregations\": [  "
+  + "  { \"type\": \"longSum\", \"name\": \"sample_name1\", 
\"fieldName\": \"sample_fieldName1\" },  "
+  + "  { \"type\": \"doubleSum\", \"name\": \"sample_name2\",

[5/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)

2018-11-12 Thread bslim

http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
--
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
index c3e7e5d..8fcadea 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -30,7 +30,6 @@ import com.google.common.collect.Interners;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Ordering;
-import com.google.common.io.CharStreams;
 import io.druid.data.input.impl.DimensionSchema;
 import io.druid.data.input.impl.StringDimensionSchema;
 import io.druid.jackson.DefaultObjectMapper;
@@ -69,6 +68,7 @@ import io.druid.query.spec.MultipleIntervalSegmentSpec;
 import io.druid.segment.IndexIO;
 import io.druid.segment.IndexMergerV9;
 import io.druid.segment.IndexSpec;
+import io.druid.segment.data.BitmapSerdeFactory;
 import io.druid.segment.data.ConciseBitmapSerdeFactory;
 import io.druid.segment.data.RoaringBitmapSerdeFactory;
 import io.druid.segment.indexing.granularity.GranularitySpec;
@@ -92,6 +92,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
@@ -105,6 +106,7 @@ import org.jboss.netty.handler.codec.http.HttpMethod;
 import org.jboss.netty.handler.codec.http.HttpResponseStatus;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;
+import org.joda.time.Period;
 import org.joda.time.chrono.ISOChronology;
 import org.skife.jdbi.v2.Folder3;
 import org.skife.jdbi.v2.Handle;
@@ -117,37 +119,52 @@ import org.skife.jdbi.v2.util.ByteArrayMapper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.annotation.Nullable;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.OutputStream;
-import java.io.Reader;
 import java.net.InetAddress;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.UnknownHostException;
 import java.sql.SQLException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Properties;
 import java.util.Set;
 import java.util.TimeZone;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
-
+import java.util.stream.Collectors;
 
 /**
  * Utils class for Druid storage handler.
  */
 public final class DruidStorageHandlerUtils {
+  private DruidStorageHandlerUtils () {
+
+  }
 
   private static final Logger LOG = 
LoggerFactory.getLogger(DruidStorageHandlerUtils.class);
 
+  private static final String DRUID_ROLLUP = "druid.rollup";
+  private static final String DRUID_QUERY_GRANULARITY = 
"druid.query.granularity";
+  public static final String DRUID_QUERY_FETCH = "druid.query.fetch";
+  static final String DRUID_SEGMENT_DIRECTORY = 
"druid.storage.storageDirectory";
+  public static final String DRUID_SEGMENT_INTERMEDIATE_DIRECTORY = 
"druid.storage.storageDirectory.intermediate";
+  public static final String DRUID_SEGMENT_VERSION = "druid.segment.version";
+  public static final String DRUID_JOB_WORKING_DIRECTORY = 
"druid.job.workingDirectory";
+  static final String KAFKA_TOPIC = "kafka.topic";
+  static final String KAFKA_BOOTSTRAP_SERVERS = "kafka.bootstrap.servers";
+  static final String DRUID_KAFKA_INGESTION_PROPERTY_PREFIX = 
"druid.kafka.ingestion.";
+  static final String DRUID_KAFKA_CONSUMER_PROPERTY_PREFIX = 
DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "consumer.";
+  /* Kafka Ingestion state - valid values - START/STOP/RESET */
+  static final String DRUID_KAFKA_INGESTION = "druid.kafka.ingestion";
   private static final int NUM_RETRIES = 8;

[6/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)

2018-11-12 Thread bslim

HIVE-20903: Some minor refactor to the Druid Storage Handler without any change 
in logic (Slim B reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dca389b0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dca389b0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dca389b0

Branch: refs/heads/master
Commit: dca389b067a36f0a3bf28743fceacdc144c34bcb
Parents: d22fc5b
Author: Slim Bouguerra 
Authored: Mon Nov 12 07:54:06 2018 -0800
Committer: Slim Bouguerra 
Committed: Mon Nov 12 07:54:06 2018 -0800

--
 .../org/apache/hadoop/hive/conf/Constants.java  |   17 -
 .../hadoop/hive/druid/DruidKafkaUtils.java  |  167 +++
 .../hadoop/hive/druid/DruidStorageHandler.java  |  879 +
 .../hive/druid/DruidStorageHandlerInfo.java |   53 +-
 .../hive/druid/DruidStorageHandlerUtils.java|  883 ++---
 .../hadoop/hive/druid/io/DruidOutputFormat.java |   32 +-
 .../druid/io/DruidQueryBasedInputFormat.java|   63 +-
 .../hadoop/hive/druid/io/DruidRecordWriter.java |  212 ++-
 .../hadoop/hive/druid/io/HiveDruidSplit.java|   19 +-
 .../druid/json/KafkaSupervisorIOConfig.java |  199 ++-
 .../hive/druid/json/KafkaSupervisorReport.java  |  157 +--
 .../hive/druid/json/KafkaSupervisorSpec.java|  119 +-
 .../druid/json/KafkaSupervisorTuningConfig.java |  152 +--
 .../hive/druid/json/KafkaTuningConfig.java  |  175 +--
 .../hadoop/hive/druid/json/TaskReportData.java  |   68 +-
 .../hive/druid/security/DruidKerberosUtil.java  |   58 +-
 .../hive/druid/security/KerberosHttpClient.java |   86 +-
 .../druid/security/ResponseCookieHandler.java   |   44 +-
 .../RetryIfUnauthorizedResponseHandler.java |   62 +-
 .../druid/security/RetryResponseHolder.java |   23 +-
 .../serde/DruidGroupByQueryRecordReader.java|   19 +-
 .../druid/serde/DruidQueryRecordReader.java |  171 +--
 .../druid/serde/DruidScanQueryRecordReader.java |   35 +-
 .../serde/DruidSelectQueryRecordReader.java |   34 +-
 .../hadoop/hive/druid/serde/DruidSerDe.java |  185 +--
 .../hive/druid/serde/DruidSerDeUtils.java   |   48 +-
 .../serde/DruidTimeseriesQueryRecordReader.java |3 +-
 .../druid/serde/DruidTopNQueryRecordReader.java |   46 +-
 .../hadoop/hive/druid/serde/DruidWritable.java  |   60 +-
 .../hive/druid/DerbyConnectorTestUtility.java   |   13 +-
 .../hadoop/hive/druid/QTestDruidSerDe.java  |   73 +-
 .../hive/druid/TestDruidStorageHandler.java |  107 +-
 .../TestHiveDruidQueryBasedInputFormat.java |4 +-
 .../hadoop/hive/druid/serde/TestDruidSerDe.java | 1234 +-
 .../hive/ql/io/TestDruidRecordWriter.java   |  238 ++--
 .../clientpositive/kafka_storage_handler.q  |4 +-
 .../druid/kafka_storage_handler.q.out   |8 +-
 37 files changed, 2669 insertions(+), 3081 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/common/src/java/org/apache/hadoop/hive/conf/Constants.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java 
b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
index 61bc9df..44d0717 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
@@ -32,8 +32,6 @@ public class Constants {
   "org.apache.hadoop.hive.druid.io.DruidOutputFormat";
   public static final String DRUID_DATA_SOURCE = "druid.datasource";
   public static final String DRUID_SEGMENT_GRANULARITY = 
"druid.segment.granularity";
-  public static final String DRUID_ROLLUP = "druid.rollup";
-  public static final String DRUID_QUERY_GRANULARITY = 
"druid.query.granularity";
   public static final String DRUID_TARGET_SHARDS_PER_GRANULARITY =
   "druid.segment.targetShardsPerGranularity";
   public static final String DRUID_TIMESTAMP_GRANULARITY_COL_NAME = 
"__time_granularity";
@@ -42,21 +40,6 @@ public class Constants {
   public static final String DRUID_QUERY_FIELD_NAMES = "druid.fieldNames";
   public static final String DRUID_QUERY_FIELD_TYPES = "druid.fieldTypes";
   public static final String DRUID_QUERY_TYPE = "druid.query.type";
-  public static final String DRUID_QUERY_FETCH = "druid.query.fetch";
-  public static final String DRUID_SEGMENT_DIRECTORY = 
"druid.storage.storageDirectory";
-  public static final String DRUID_SEGMENT_INTERMEDIATE_DIRECTORY = 
"druid.storage.storageDirectory.intermediate";
-
-  public static final String DRUID_SEGMENT_VERSION = "druid.segment.version";
-  public static final String DRUID_JOB_WORKING_DIRECTORY = 
"druid.job.workingDirectory";
-
-
-  public static final String KAFKA_TOPIC = "kafka.topic";
-  public static final String KAFKA_BOOTSTRAP_SERVERS =

[1/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)

2018-11-12 Thread bslim

Repository: hive
Updated Branches:
  refs/heads/master d22fc5b24 -> dca389b06


http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java
--
diff --git 
a/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java
 
b/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java
index cb8fa39..111f047 100644
--- 
a/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java
+++ 
b/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java
@@ -7,7 +7,7 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *  http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -18,14 +18,13 @@
 
 package org.apache.hadoop.hive.ql.io;
 
+import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import io.druid.data.input.Firehose;
 import io.druid.data.input.InputRow;
-import io.druid.data.input.impl.DimensionSchema;
 import io.druid.data.input.impl.DimensionsSpec;
 import io.druid.data.input.impl.InputRowParser;
 import io.druid.data.input.impl.MapInputRowParser;
@@ -69,141 +68,144 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import javax.annotation.Nullable;
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
-public class TestDruidRecordWriter {
-  private ObjectMapper objectMapper = DruidStorageHandlerUtils.JSON_MAPPER;
+/**
+ * Test Class for Druid Record Writer.
+ */
+@SuppressWarnings("ConstantConditions") public class TestDruidRecordWriter {
+  private final ObjectMapper objectMapper = 
DruidStorageHandlerUtils.JSON_MAPPER;
 
   private static final Interval INTERVAL_FULL = new 
Interval("2014-10-22T00:00:00Z/P1D");
 
-  @Rule
-  public TemporaryFolder temporaryFolder = new TemporaryFolder();
-
-  private DruidRecordWriter druidRecordWriter;
-
-  final List> expectedRows = ImmutableList.of(
-  ImmutableMap.of(
-  DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN,
-  DateTime.parse("2014-10-22T00:00:00.000Z").getMillis(),
-  "host", ImmutableList.of("a.example.com"),
-  "visited_sum", 190L,
-  "unique_hosts", 1.0d
-  ),
-  ImmutableMap.of(
-  DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN,
-  DateTime.parse("2014-10-22T01:00:00.000Z").getMillis(),
-  "host", ImmutableList.of("b.example.com"),
-  "visited_sum", 175L,
-  "unique_hosts", 1.0d
-  ),
-  ImmutableMap.of(
-  DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN,
-  DateTime.parse("2014-10-22T02:00:00.000Z").getMillis(),
-  "host", ImmutableList.of("c.example.com"),
-  "visited_sum", 270L,
-  "unique_hosts", 1.0d
-  )
-  );
-
-
-  @Test
-  public void testTimeStampColumnName() {
+  @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+  final List>
+  expectedRows =
+  
ImmutableList.of(ImmutableMap.of(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN,
+  DateTime.parse("2014-10-22T00:00:00.000Z").getMillis(),
+  "host",
+  ImmutableList.of("a.example.com"),
+  "visited_sum",
+  190L,
+  "unique_hosts",
+  1.0d),
+  ImmutableMap.of(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN,
+  DateTime.parse("2014-10-22T01:00:00.000Z").getMillis(),
+  "host",
+  ImmutableList.of("b.example.com"),
+  "visited_sum",
+  175L,
+  "unique_hosts",
+  1.0d),
+  ImmutableMap.of(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN,
+  DateTime.parse("2014-10-22T02:00:00.000Z").getMillis(),
+  "host",
+  ImmutableList.of("c.example.com"),
+  "visited_sum",
+  270L,
+  "unique_hosts",
+  1.0d));
+
+  @Test public void testTimeStampColumnName() {
 Assert.assertEquals("Time column name need to match to ensure serdeser 
compatibility",
-DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, 
DruidTable.DEFAULT_TIMESTAMP_COLUMN
-);
+

[3/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)

2018-11-12 Thread bslim

http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
--
diff --git 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
index 8c10261..53d7441 100644
--- 
a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
+++ 
b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
@@ -30,7 +30,6 @@ import io.druid.java.util.common.guava.CloseQuietly;
 import io.druid.java.util.http.client.HttpClient;
 import io.druid.java.util.http.client.Request;
 import io.druid.java.util.http.client.response.InputStreamResponseHandler;
-import io.druid.query.BaseQuery;
 import io.druid.query.Query;
 import io.druid.query.QueryInterruptedException;
 import org.apache.hadoop.conf.Configuration;
@@ -49,6 +48,7 @@ import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Iterator;
+import java.util.Objects;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 
@@ -61,17 +61,11 @@ import java.util.concurrent.Future;
  * DruidWritable containing the timestamp as well as all values resulting from
  * the query.
  */
-public abstract class DruidQueryRecordReader, R extends 
Comparable>
-extends RecordReader
-implements org.apache.hadoop.mapred.RecordReader {
+public abstract class DruidQueryRecordReader> extends 
RecordReader
+implements org.apache.hadoop.mapred.RecordReader {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(DruidQueryRecordReader.class);
 
-  private HttpClient httpClient;
-  private ObjectMapper mapper;
-  // Smile mapper is used to read query results that are serialized as binary 
instead of json
-  private ObjectMapper smileMapper;
-
   /**
* Query that Druid executes.
*/
@@ -80,62 +74,60 @@ public abstract class DruidQueryRecordReader, R extends C
   /**
* Query results as a streaming iterator.
*/
-  protected JsonParserIterator queryResultsIterator =  null;
-
-  /**
-   * Result type definition used to read the rows, this is query dependent.
-   */
-  protected JavaType resultsType = null;
+  JsonParserIterator queryResultsIterator = null;
 
-  @Override
-  public void initialize(InputSplit split, TaskAttemptContext context) throws 
IOException {
+  @Override public void initialize(InputSplit split, TaskAttemptContext 
context) throws IOException {
 initialize(split, context.getConfiguration());
   }
 
-  public void initialize(InputSplit split, Configuration conf, ObjectMapper 
mapper,
-  ObjectMapper smileMapper, HttpClient httpClient
-  ) throws IOException {
+  public void initialize(InputSplit split,
+  Configuration conf,
+  ObjectMapper mapper,
+  ObjectMapper smileMapper,
+  HttpClient httpClient) throws IOException {
 HiveDruidSplit hiveDruidSplit = (HiveDruidSplit) split;
 Preconditions.checkNotNull(hiveDruidSplit, "input split is null ???");
-this.mapper = Preconditions.checkNotNull(mapper, "object Mapper can not be 
null");
-// Smile mapper is used to read query results that are serilized as binary 
instead of json
-this.smileMapper = Preconditions.checkNotNull(smileMapper, "Smile Mapper 
can not be null");
+ObjectMapper mapper1 = Preconditions.checkNotNull(mapper, "object Mapper 
can not be null");
+// Smile mapper is used to read query results that are serialized as 
binary instead of json
+// Smile mapper is used to read query results that are serialized as 
binary instead of json
+ObjectMapper smileMapper1 = Preconditions.checkNotNull(smileMapper, "Smile 
Mapper can not be null");
 // Create query
-this.query = 
this.mapper.readValue(Preconditions.checkNotNull(hiveDruidSplit.getDruidQuery()),
 Query.class);
+this.query = 
mapper1.readValue(Preconditions.checkNotNull(hiveDruidSplit.getDruidQuery()), 
Query.class);
 Preconditions.checkNotNull(query);
-this.resultsType = getResultTypeDef();
-this.httpClient = Preconditions.checkNotNull(httpClient, "need Http 
Client");
+/*
+  Result type definition used to read the rows, this is query dependent.
+ */
+JavaType resultsType = getResultTypeDef();
+HttpClient httpClient1 = Preconditions.checkNotNull(httpClient, "need Http 
Client");
 final String[] locations = hiveDruidSplit.getLocations();
-boolean initlialized = false;
+boolean initialized = false;
 int currentLocationIndex = 0;
 Exception ex = null;
-while (!initlialized && currentLocationIndex < locations.length) {
+while (!initialized && currentLocationIndex < locations.length) {
   String address = locations[currentLocationIndex++];
-  if(Strings.isNullOrEmpty(address)) {
+  if

hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)

2018-11-12 Thread prasanthj

Repository: hive
Updated Branches:
  refs/heads/master 558876462 -> bc39c4998


HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS 
only (Gour Saha reviewd by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc39c499
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc39c499
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc39c499

Branch: refs/heads/master
Commit: bc39c49988c8a5d881a23ed7dd5d4adba0509ee9
Parents: 5588764
Author: Gour Saha 
Authored: Mon Nov 12 13:04:21 2018 -0800
Committer: Prasanth Jayachandran 
Committed: Mon Nov 12 13:04:24 2018 -0800

--
 llap-server/src/main/resources/package.py | 2 --
 1 file changed, 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/bc39c499/llap-server/src/main/resources/package.py
--
diff --git a/llap-server/src/main/resources/package.py 
b/llap-server/src/main/resources/package.py
index 9eb3fd7..c48ff79 100644
--- a/llap-server/src/main/resources/package.py
+++ b/llap-server/src/main/resources/package.py
@@ -130,8 +130,6 @@ def main(args):
service_keytab_path += "/" + service_keytab
else:
service_keytab_path = service_keytab
-   if service_keytab_path:
-   service_keytab_path = "hdfs:///user/hive/" + service_keytab_path
 
if not input:
print "Cannot find input files"

hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)

2018-11-12 Thread prasanthj

Repository: hive
Updated Branches:
  refs/heads/branch-3 cd4491900 -> 4663e50e7


HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS 
only (Gour Saha reviewd by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4663e50e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4663e50e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4663e50e

Branch: refs/heads/branch-3
Commit: 4663e50e709c1f836acb34841a72f1dfc9f31da9
Parents: cd44919
Author: Gour Saha 
Authored: Mon Nov 12 13:04:21 2018 -0800
Committer: Prasanth Jayachandran 
Committed: Mon Nov 12 13:04:45 2018 -0800

--
 llap-server/src/main/resources/package.py | 2 --
 1 file changed, 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4663e50e/llap-server/src/main/resources/package.py
--
diff --git a/llap-server/src/main/resources/package.py 
b/llap-server/src/main/resources/package.py
index 9eb3fd7..c48ff79 100644
--- a/llap-server/src/main/resources/package.py
+++ b/llap-server/src/main/resources/package.py
@@ -130,8 +130,6 @@ def main(args):
service_keytab_path += "/" + service_keytab
else:
service_keytab_path = service_keytab
-   if service_keytab_path:
-   service_keytab_path = "hdfs:///user/hive/" + service_keytab_path
 
if not input:
print "Cannot find input files"

[32/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/mapjoin47.q.out
--
diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out 
b/ql/src/test/results/clientpositive/mapjoin47.q.out
index d3e61f8..dadac0d 100644
--- a/ql/src/test/results/clientpositive/mapjoin47.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin47.q.out
@@ -385,8 +385,8 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   HashTable Sink Operator
 keys:
@@ -400,8 +400,8 @@ STAGE PLANS:
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Map Join Operator
 condition map:
@@ -409,19 +409,23 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3
-residual filter predicates: {((_col0 = _col2) or 
UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 
100.0D AND 102.0D)}
-Statistics: Num rows: 9026 Data size: 173876 Basic stats: 
COMPLETE Column stats: NONE
-Limit
-  Number of rows: 10
-  Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
-  File Output Operator
-compressed: false
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+residual filter predicates: {((_col0 = _col3) or _col2 or 
_col5)}
+Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: string), _col4 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3
+  Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+  Limit
+Number of rows: 10
 Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
   Local Work:
 Map Reduce Local Work
 
@@ -501,8 +505,8 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(key) (type: double)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   HashTable Sink Operator
 keys:
@@ -516,8 +520,8 @@ STAGE PLANS:
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string),

[17/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
index 50474bc..f5a71b4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
@@ -184,36 +184,36 @@ CBO PLAN:
 HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3])
   HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], 
dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
 HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], 
_o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col 
ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col 
wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col 
wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 
20)=[substr($0, 1, 20)])
-  HiveAggregate(group=[{7}], agg#0=[sum($26)], agg#1=[count($26)], 
agg#2=[sum($21)], agg#3=[count($21)], agg#4=[sum($20)], agg#5=[count($20)])
-HiveJoin(condition=[AND(AND(=($0, $17), =($4, $1)), =($5, $2))], 
joinType=[inner], algorithm=[none], cost=[not available])
-  HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3])
+  HiveAggregate(group=[{14}], agg#0=[sum($32)], agg#1=[count($32)], 
agg#2=[sum($27)], agg#3=[count($27)], agg#4=[sum($26)], agg#5=[count($26)])
+HiveJoin(condition=[AND(AND(AND(=($1, $18), =($2, $19)), =($0, $21)), 
OR(AND($3, $4, $36), AND($5, $6, $37), AND($7, $8, $38)))], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr 
Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, 
_UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')])
 HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', 
_UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', 
_UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))])
-  HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[cd2])
-  HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), 
=($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $24, 100, 150)), AND(=($1, 
_UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $24, 50, 100)), 
AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $24, 
150, 200], joinType=[inner], algorithm=[none], cost=[not available])
-HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3])
-  HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', 
_UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', 
_UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))])
-HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[cd1])
-HiveJoin(condition=[=($0, $12)], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[cd1])
+  HiveJoin(condition=[AND(=($0, $13), OR(AND($1, $24), AND($2, $25), 
AND($3, $26)))], joinType=[inner], algorithm=[none], cost=[not available])
+HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', 
_UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', 
_UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')])
+  HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', 
_UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', 
_UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT 
NULL($0))])
+HiveTableScan(table=[[default, customer_address]], 
table:alias=[customer_address])
+HiveJoin(condition=[=($0, $11)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject(r_reason_sk=[$0], r_reason_desc=[$2])
 HiveFilter(condition=[IS NOT NULL($0)])
   HiveTableScan(table=[[default, reason]], 
table:alias=[reason])
-  HiveJoin(condition=[=($14, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
-HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER])
-  HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))])
-HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-HiveJoin(condition=[=($14, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject(wp_web_page_sk=[$0])
-HiveFilter(condition=[IS NOT NULL($0)])
-

[28/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query26.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
index b6ee41e..48c0e11 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
@@ -221,11 +221,11 @@ STAGE PLANS:
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col4, _col5, _col6, _col7, _col18
+outputColumnNames: _col4, _col5, _col6, _col7, _col12
 Statistics: Num rows: 421645953 Data size: 57099332415 Basic 
stats: COMPLETE Column stats: NONE
 Group By Operator
   aggregations: sum(_col4), count(_col4), sum(_col5), 
count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6)
-  keys: _col18 (type: string)
+  keys: _col12 (type: string)
   mode: hash
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
   Statistics: Num rows: 421645953 Data size: 57099332415 Basic 
stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query27.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
index 4063c4f..6c64664 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
@@ -207,7 +207,7 @@ STAGE PLANS:
   keys:
 0 _col3 (type: int)
 1 _col0 (type: int)
-  outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col15
+  outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col11
   input vertices:
 1 Map 9
   Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE
@@ -216,7 +216,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col1 (type: int)
 Statistics: Num rows: 766650239 Data size: 67634106676 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col4 (type: int), _col5 (type: 
decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 
(type: string)
+value expressions: _col4 (type: int), _col5 (type: 
decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 
(type: string)
 Reducer 4 
 Reduce Operator Tree:
   Join Operator
@@ -225,10 +225,10 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col4, _col5, _col6, _col7, _col15, _col17
+outputColumnNames: _col4, _col5, _col6, _col7, _col11, _col13
 Statistics: Num rows: 843315281 Data size: 74397518956 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: _col17 (type: string), _col15 (type: string), 
_col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), 
_col6 (type: decimal(7,2))
+  expressions: _col13 (type: string), _col11 (type: string), 
_col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), 
_col6 (type: decimal(7,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 843315281 Data size: 74397518956 Basic 
stats: COMPLETE Column stats: NONE
   Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query29.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
index 2e5c0f3..f4a4524 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
@@ -295,7 +295,7 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col3, _col5, _col10, _col11, _col13, 
_col18, _col19
+outputColumnNames: _col3, _col5, _col8, _col9, _col11, _col14, 
_col15
 Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE

[37/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_select.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
index 6870ad1..0435530 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
@@ -32,14 +32,15 @@ STAGE PLANS:
   alias: part
   Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: p_size (type: int)
-outputColumnNames: _col0
-Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: p_size (type: int), p_size is null (type: 
boolean)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 26 Data size: 208 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col0 (type: int)
   sort order: +
   Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 26 Data size: 208 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col1 (type: boolean)
   Select Operator
 expressions: p_size (type: int)
 outputColumnNames: p_size
@@ -77,12 +78,12 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col2
-Statistics: Num rows: 27 Data size: 116 Basic stats: COMPLETE 
Column stats: COMPLETE
+outputColumnNames: _col0, _col1, _col3
+Statistics: Num rows: 27 Data size: 224 Basic stats: COMPLETE 
Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 27 Data size: 116 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col2 (type: boolean)
+  Statistics: Num rows: 27 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: int), _col1 (type: boolean), 
_col3 (type: boolean)
 Reducer 3 
 Execution mode: llap
 Reduce Operator Tree:
@@ -92,10 +93,10 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col2, _col3, _col4
-Statistics: Num rows: 27 Data size: 548 Basic stats: COMPLETE 
Column stats: COMPLETE
+outputColumnNames: _col0, _col1, _col3, _col4, _col5
+Statistics: Num rows: 27 Data size: 440 Basic stats: COMPLETE 
Column stats: COMPLETE
 Select Operator
-  expressions: _col0 (type: int), CASE WHEN ((_col3 = 0L)) 
THEN (false) WHEN (_col2 is not null) THEN (true) WHEN (_col0 is null) THEN 
(null) WHEN ((_col4 < _col3)) THEN (null) ELSE (false) END (type: boolean)
+  expressions: _col0 (type: int), CASE WHEN (_col4) THEN 
(false) WHEN (_col3 is not null) THEN (true) WHEN (_col1) THEN (null) WHEN 
(_col5) THEN (null) ELSE (false) END (type: boolean)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 27 Data size: 216 Basic stats: 
COMPLETE Column stats: COMPLETE
   File Output Operator
@@ -136,10 +137,14 @@ STAGE PLANS:
   mode: complete
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
-value expressions: _col0 (type: bigint), _col1 (type: 
bigint)
+  Select Operator
+expressions: (_col0 = 0L) (type: boolean), (_col1 < _col0) 
(type: boolean)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: boolean), _col1 (type: 
boolean)
 
   Stage: Stage-0
 Fetch Operator
@@ -219,15 +224,15 @@ STAGE

[16/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query11.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query11.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query11.q.out
index 2f453f3..da1c349 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query11.q.out
@@ -189,249 +189,241 @@ Stage-0
 limit:100
 Stage-1
   Reducer 8 vectorized
-  File Output Operator [FS_358]
-Limit [LIM_357] (rows=100 width=85)
+  File Output Operator [FS_354]
+Limit [LIM_353] (rows=100 width=85)
   Number of rows:100
-  Select Operator [SEL_356] (rows=12248093 width=85)
+  Select Operator [SEL_352] (rows=12248093 width=85)
 Output:["_col0"]
   <-Reducer 7 [SIMPLE_EDGE]
-SHUFFLE [RS_97]
-  Select Operator [SEL_96] (rows=12248093 width=85)
+SHUFFLE [RS_93]
+  Select Operator [SEL_92] (rows=12248093 width=85)
 Output:["_col0"]
-Filter Operator [FIL_95] (rows=12248093 width=533)
-  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col5 is not null) THEN (((_col1 / _col5) > (_col8 / _col3))) ELSE ((null > 
(_col8 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / 
_col5) > null)) ELSE (null) END) END
-  Merge Join Operator [MERGEJOIN_291] (rows=24496186 width=533)
-
Conds:RS_92._col2=RS_355._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8"]
+Filter Operator [FIL_91] (rows=12248093 width=537)
+  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / 
_col3))) END) ELSE (CASE WHEN (_col6) THEN (((_col1 / _col5) > null)) ELSE 
(null) END) END
+  Merge Join Operator [MERGEJOIN_287] (rows=24496186 width=537)
+
Conds:RS_88._col2=RS_351._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"]
   <-Reducer 20 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_355]
+SHUFFLE [RS_351]
   PartitionCols:_col0
-  Select Operator [SEL_354] (rows=8000 width=297)
+  Select Operator [SEL_350] (rows=8000 width=297)
 Output:["_col0","_col1","_col2"]
-Group By Operator [GBY_353] (rows=8000 width=764)
+Group By Operator [GBY_349] (rows=8000 width=764)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6
 <-Reducer 19 [SIMPLE_EDGE]
-  SHUFFLE [RS_83]
+  SHUFFLE [RS_79]
 PartitionCols:_col0, _col1, _col2, _col3, _col4, 
_col5, _col6
-Group By Operator [GBY_82] (rows=8000 
width=764)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0,
 _col1, _col2, _col3, _col4, _col5, _col6
-  Select Operator [SEL_80] (rows=187573258 
width=847)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-Merge Join Operator [MERGEJOIN_288] 
(rows=187573258 width=847)
-  
Conds:RS_77._col1=RS_321._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-<-Map 26 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_321]
-PartitionCols:_col0
-Select Operator [SEL_320] (rows=8000 
width=656)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-  Filter Operator [FIL_319] (rows=8000 
width=656)
-predicate:(c_customer_id is not null 
and c_customer_sk is not null)
-TableScan [TS_71] (rows=8000 
width=656)
-  
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"]
-<-Reducer 18 [SIMPLE_EDGE]
-  SHUFFLE [RS_77]
-PartitionCols:_col1
-

[11/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query48.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query48.q.out
index 76b4ce1..1f63e95 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out
@@ -143,15 +143,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 7 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 
(BROADCAST_EDGE)
-Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 7 
(BROADCAST_EDGE)
+Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
 Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
-Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
@@ -165,103 +165,103 @@ Stage-0
   PARTITION_ONLY_SHUFFLE [RS_30]
 Group By Operator [GBY_29] (rows=1 width=8)
   Output:["_col0"],aggregations:["sum(_col5)"]
-  Select Operator [SEL_28] (rows=25203 width=86)
+  Select Operator [SEL_28] (rows=20247 width=24)
 Output:["_col5"]
-Filter Operator [FIL_27] (rows=25203 width=86)
-  predicate:(((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 
0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) 
or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000))
-  Merge Join Operator [MERGEJOIN_96] (rows=75613 width=86)
-
Conds:RS_24._col3=RS_118._col0(Inner),Output:["_col5","_col7","_col14"]
+Filter Operator [FIL_27] (rows=20247 width=24)
+  predicate:((_col12 and _col6) or (_col13 and _col7) or 
(_col14 and _col8))
+  Merge Join Operator [MERGEJOIN_96] (rows=26999 width=24)
+
Conds:RS_24._col3=RS_115._col0(Inner),Output:["_col5","_col6","_col7","_col8","_col12","_col13","_col14"]
   <-Map 12 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_118]
+SHUFFLE [RS_115]
   PartitionCols:_col0
-  Select Operator [SEL_117] (rows=3529412 width=187)
-Output:["_col0","_col1"]
-Filter Operator [FIL_116] (rows=3529412 width=187)
+  Select Operator [SEL_114] (rows=3529412 width=16)
+Output:["_col0","_col1","_col2","_col3"]
+Filter Operator [FIL_113] (rows=3529412 width=187)
   predicate:((ca_country = 'United States') and 
(ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and 
ca_address_sk is not null)
   TableScan [TS_12] (rows=4000 width=187)
 
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"]
   <-Reducer 4 [SIMPLE_EDGE]
 SHUFFLE [RS_24]
   PartitionCols:_col3
-  Merge Join Operator [MERGEJOIN_95] (rows=856941 width=0)
-
Conds:RS_21._col2=RS_110._col0(Inner),Output:["_col3","_col5","_col7"]
-  <-Map 10 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_110]
+  Merge Join Operator [MERGEJOIN_95] (rows=305980 width=12)
+
Conds:RS_21._col4=RS_126._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col8"]
+  <-Map 11 [SIMPLE_EDGE] vectorized
+SHUFFLE [RS_126]
   PartitionCols:_col0
-  Select Operator [SEL_109] (rows=29552 width=184)
+  Select Operator [SEL_125] (rows=1704 width=4)
 Output:["_col0"]
-Filter Operator [FIL_108] (rows=29552 width=183)
-  predicate:((cd_education_status = '4 yr Degree') 
and (cd_marital_status = 'M') and cd_demo_sk is not null)
-  TableScan [TS_9] (rows=1861800 width=183)
-

[34/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
index a8f06eb..def06a5 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
@@ -122,18 +122,19 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: int), ((key > 
40) and (value > 50) and (key = value)) (type: boolean)
+outputColumnNames: _col0, _col1, _col2
 Select Vectorization:
 className: VectorSelectOperator
 native: true
+selectExpressions: VectorUDFAdaptor(((key > 40) and 
(value > 50) and (key = value)))(children: LongColGreaterLongScalar(col 0:int, 
val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, 
LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean) -> 6:boolean
 Statistics: Num rows: 4 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
Right Outer Join 0 to 1
   filter predicates:
 0 
-1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+1 {_col2}
   keys:
 0 
 1 
@@ -368,18 +369,19 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: int), (key > 
40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4
 Select Vectorization:
 className: VectorSelectOperator
 native: true
+selectExpressions: LongColGreaterLongScalar(col 0:int, 
val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, 
LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean
 Statistics: Num rows: 4 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
Right Outer Join 0 to 1
   filter predicates:
 0 
-1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+1 {_col2} {_col3} {_col4}
   keys:
 0 _col0 (type: int)
 1 _col1 (type: int)
@@ -397,7 +399,7 @@ STAGE PLANS:
 Select Vectorization:
 className: VectorSelectOperator
 native: true
-selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 8:int
 Statistics: Num rows: 4 Data size: 35 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: sum(_col0)
@@ -541,18 +543,19 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: int), (key > 
40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4
 Select Vectorization:
 className: VectorSelectOperator
 native: true
+selectExpressions: LongColGreaterLongScalar(col 0:int, 
val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, 
LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean
 Statistics: Num rows: 4 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:

[26/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query49.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
index 07d14b5..354c178 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
@@ -304,7 +304,7 @@ STAGE PLANS:
 predicate: ((ws_net_paid > 0) and (ws_net_profit > 1) and 
(ws_quantity > 0) and ws_item_sk is not null and ws_order_number is not null 
and ws_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 5333432 Data size: 725192506 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: ws_sold_date_sk (type: int), ws_item_sk 
(type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_paid 
(type: decimal(7,2))
+  expressions: ws_sold_date_sk (type: int), ws_item_sk 
(type: int), ws_order_number (type: int), CASE WHEN (ws_quantity is not null) 
THEN (ws_quantity) ELSE (0) END (type: int), CASE WHEN (ws_net_paid is not 
null) THEN (ws_net_paid) ELSE (0) END (type: decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 5333432 Data size: 725192506 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -312,7 +312,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 5333432 Data size: 725192506 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: decimal(7,2))
+value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: decimal(12,2))
 Execution mode: vectorized
 Map 10 
 Map Operator Tree:
@@ -343,7 +343,7 @@ STAGE PLANS:
 predicate: ((wr_return_amt > 1) and wr_item_sk is not 
null and wr_order_number is not null) (type: boolean)
 Statistics: Num rows: 4799489 Data size: 441731394 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: wr_item_sk (type: int), wr_order_number 
(type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2))
+  expressions: wr_item_sk (type: int), wr_order_number 
(type: int), CASE WHEN (wr_return_quantity is not null) THEN 
(wr_return_quantity) ELSE (0) END (type: int), CASE WHEN (wr_return_amt is not 
null) THEN (wr_return_amt) ELSE (0) END (type: decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 4799489 Data size: 441731394 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -351,7 +351,7 @@ STAGE PLANS:
 sort order: ++
 Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
 Statistics: Num rows: 4799489 Data size: 441731394 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
+value expressions: _col2 (type: int), _col3 (type: 
decimal(12,2))
 Execution mode: vectorized
 Map 12 
 Map Operator Tree:
@@ -363,7 +363,7 @@ STAGE PLANS:
 predicate: ((cs_net_paid > 0) and (cs_net_profit > 1) and 
(cs_quantity > 0) and cs_item_sk is not null and cs_order_number is not null 
and cs_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 10666290 Data size: 129931 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cs_sold_date_sk (type: int), cs_item_sk 
(type: int), cs_order_number (type: int), cs_quantity (type: int), cs_net_paid 
(type: decimal(7,2))
+  expressions: cs_sold_date_sk (type: int), cs_item_sk 
(type: int), cs_order_number (type: int), CASE WHEN (cs_quantity is not null) 
THEN (cs_quantity) ELSE (0) END (type: int), CASE WHEN (cs_net_paid is not 
null) THEN (cs_net_paid) ELSE (0) END (type: decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 10666290 Data size: 129931 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -371,7 +371,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col0 (type: int)

[45/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out 
b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
index 68fc903..4ed9b60 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
@@ -135,10 +135,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13
 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08
 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6
 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1`
-FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+FROM (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_2_n11`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+INNER JOIN (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_n13`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = 
`t2`.`key`
 STAGE DEPENDENCIES:
@@ -326,11 +326,11 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col1, _col4
+outputColumnNames: _col0, _col1, _col3
 Position of Big Table: 1
 Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
 Select Operator
-  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col4 (type: string)
+  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col3 (type: string)
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
   File Output Operator
@@ -584,10 +584,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13
 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08
 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6
 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1`
-FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+FROM (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_2_n11`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+INNER JOIN (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_n13`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = 
`t2`.`key`
 STAGE DEPENDENCIES:
@@ -775,11 +775,11 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col1, _col4
+outputColumnNames: _col0, _col1, _col3
 Position of Big Table: 1
 Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
 Select Operator
-  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col4 (type: string)
+  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col3 (type: string)
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
   File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out 
b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
index acb1e87..8039d0f 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
@@ -125,7 +125,7 @@ STAGE PLANS:
 Select Operator
   expressions: key (type: int), value (type: string)
   outputColumnNames: _col0, _col1
-  Statistics: Num rows: 84 Data size: 15036 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Operator Tree:
 TableScan
   alias: a
@@ -137,17 +137,17 @@ STAGE PLANS:
 Select Operator
   expressions: key (type: int), value (type: string)
   outputColumnNames: _col0, _col1
-  Statistics: Num rows: 10 Data size: 1780

[33/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
index bd40d8f..1b4d343 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
@@ -234,17 +234,18 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
 Select Vectorization:
 className: VectorSelectOperator
 native: true
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+selectExpressions: LongColumnBetween(col 0:int, left 
100, right 102) -> 4:boolean
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
Left Outer Join 0 to 1
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 _col1 (type: int)
@@ -253,20 +254,27 @@ STAGE PLANS:
   className: VectorMapJoinOuterLongOperator
   native: true
   nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col6
   input vertices:
 1 Map 2
   Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-  File Output Operator
-compressed: false
-File Sink Vectorization:
-className: VectorFileSinkOperator
-native: false
+  Select Operator
+expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
 Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map Vectorization:
@@ -396,17 +404,18 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN

[50/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/auto_join_stats.q.out
--
diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out 
b/ql/src/test/results/clientpositive/auto_join_stats.q.out
index 43a248b..42e165d 100644
--- a/ql/src/test/results/clientpositive/auto_join_stats.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out
@@ -63,8 +63,8 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 HashTable Sink Operator
   keys:
@@ -82,8 +82,8 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -91,7 +91,7 @@ STAGE PLANS:
   keys:
 0 _col0 (type: string)
 1 _col0 (type: string)
-  outputColumnNames: _col0, _col1
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
@@ -119,13 +119,13 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
 HashTable Sink Operator
   keys:
-0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
-1 UDFToDouble(_col0) (type: double)
+0 (_col1 + _col3) (type: double)
+1 _col1 (type: double)
 
   Stage: Stage-5
 Map Reduce
@@ -135,17 +135,21 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1, _col2
+0 (_col1 + _col3) (type: double)
+1 _col1 (type: double)
+  outputColumnNames: _col0, _col2, _col4
   Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
+  Select Operator
+expressions: _col0 (type: string), _col2 (type: string), _col4 
(type: string)
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
   Execution mode: vectorized
   Local Work:
 Map Reduce Local Work
@@ -166,8 +170,8 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-

[29/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query18.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
index e77a918..1d73576 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
@@ -106,15 +106,15 @@ STAGE PLANS:
 predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and 
c_current_addr_sk is not null and c_current_cdemo_sk is not null and 
c_customer_sk is not null) (type: boolean)
 Statistics: Num rows: 8000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: c_customer_sk (type: int), 
c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year 
(type: int)
-  outputColumnNames: _col0, _col1, _col2, _col4
+  expressions: c_customer_sk (type: int), 
c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( 
c_birth_year AS decimal(12,2)) (type: decimal(12,2))
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 8000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col2 (type: int)
 sort order: +
 Map-reduce partition columns: _col2 (type: int)
 Statistics: Num rows: 8000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col0 (type: int), _col1 (type: 
int), _col4 (type: int)
+value expressions: _col0 (type: int), _col1 (type: 
int), _col3 (type: decimal(12,2))
 Execution mode: vectorized
 Map 13 
 Map Operator Tree:
@@ -145,15 +145,15 @@ STAGE PLANS:
 predicate: ((cd_education_status = 'College') and 
(cd_gender = 'M') and cd_demo_sk is not null) (type: boolean)
 Statistics: Num rows: 465450 Data size: 179296539 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cd_demo_sk (type: int), cd_dep_count (type: 
int)
-  outputColumnNames: _col0, _col3
+  expressions: cd_demo_sk (type: int), CAST( cd_dep_count 
AS decimal(12,2)) (type: decimal(12,2))
+  outputColumnNames: _col0, _col1
   Statistics: Num rows: 465450 Data size: 179296539 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 465450 Data size: 179296539 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col3 (type: int)
+value expressions: _col1 (type: decimal(12,2))
 Execution mode: vectorized
 Map 15 
 Map Operator Tree:
@@ -224,7 +224,7 @@ STAGE PLANS:
 predicate: (cs_bill_cdemo_sk is not null and 
cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cs_sold_date_sk (type: int), 
cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk 
(type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), 
cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), 
cs_net_profit (type: decimal(7,2))
+  expressions: cs_sold_date_sk (type: int), 
cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk 
(type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( 
cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS 
decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) 
(type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: 
decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
   Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -232,7 +232,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col0 (type: int)

[02/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
index 3165970..2fc9a3d 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
@@ -2662,7 +2662,7 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: string)
+  expressions: key (type: int), substr(value, 5) (type: 
string)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -2703,22 +2703,18 @@ STAGE PLANS:
   1 _col0 (type: int)
 outputColumnNames: _col1, _col2
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: _col1 (type: int), substr(_col2, 5) (type: 
string)
+Group By Operator
+  aggregations: sum(_col2)
+  keys: _col1 (type: int)
+  mode: hash
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: sum(_col1)
-keys: _col0 (type: int)
-mode: hash
-outputColumnNames: _col0, _col1
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type: double)
+value expressions: _col1 (type: double)
 Reducer 5 
 Execution mode: vectorized
 Reduce Operator Tree:
@@ -2827,7 +2823,7 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: string)
+  expressions: key (type: int), substr(value, 5) (type: 
string)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -2868,22 +2864,18 @@ STAGE PLANS:
   1 _col0 (type: int)
 outputColumnNames: _col1, _col2
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: _col1 (type: int), substr(_col2, 5) (type: 
string)
+Group By Operator
+  aggregations: sum(_col2)
+  keys: _col1 (type: int)
+  mode: hash
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: sum(_col1)
-keys: _col0 (type: int)
-mode: hash
-outputColumnNames: _col0, _col1
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type:

[08/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query66.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query66.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query66.q.out
index 225b62f..767d47b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query66.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query66.q.out
@@ -511,10 +511,10 @@ Stage-0
 PartitionCols:_col0, _col1, _col2, _col3, 
_col4, _col5
 Group By Operator [GBY_62] (rows=5559759 
width=3166)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0,
 _col1, _col2, _col3, _col4, _col5
-  Select Operator [SEL_60] (rows=5559759 
width=680)
+  Select Operator [SEL_60] (rows=5559759 
width=750)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"]
-Merge Join Operator [MERGEJOIN_204] 
(rows=5559759 width=680)
-  
Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"]
+Merge Join Operator [MERGEJOIN_204] 
(rows=5559759 width=750)
+  
Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"]
 <-Map 24 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_259]
 PartitionCols:_col0
@@ -527,12 +527,12 @@ Stage-0
 <-Reducer 14 [SIMPLE_EDGE]
   SHUFFLE [RS_57]
 PartitionCols:_col3
-Merge Join Operator [MERGEJOIN_203] 
(rows=5559759 width=205)
-  
Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11"]
+Merge Join Operator [MERGEJOIN_203] 
(rows=5559759 width=274)
+  
Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"]
 <-Map 21 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_245]
 PartitionCols:_col0
-Select Operator [SEL_242] (rows=1 
width=88)
+Select Operator [SEL_242] (rows=1 
width=4)
   Output:["_col0"]
   Filter Operator [FIL_241] 
(rows=1 width=88)
 predicate:((sm_carrier) IN 
('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null)
@@ -541,13 +541,13 @@ Stage-0
 <-Reducer 13 [SIMPLE_EDGE]
   SHUFFLE [RS_54]
 PartitionCols:_col2
-Merge Join Operator 
[MERGEJOIN_202] (rows=9518 width=224)
-  
Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col11"]
+Merge Join Operator 
[MERGEJOIN_202] (rows=9518 width=278)
+  
Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"]
 <-Map 18

[21/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
index ace7cf5..b55f2c1 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
@@ -1,7 +1,7 @@
-Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
-Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
-Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
-Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
+Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
+Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
+Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
+Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
 PREHOOK: query: explain cbo
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date 
solddate,count(*) cnt
@@ -128,16 +128,15 @@ HiveSortLimit(fetch=[100])
 HiveJoin(condition=[=($3, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject($f0=[$0])
 HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject($f0=[$0], $f1=[$1])
-HiveAggregate(group=[{0}], agg#0=[sum($1)])
-  HiveProject($f0=[$3], $f1=[*(CAST($1):DECIMAL(10, 0), 
$2)])
-HiveJoin(condition=[=($0, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject(ss_customer_sk=[$3], ss_quantity=[$10], 
ss_sales_price=[$13])
-HiveFilter(condition=[IS NOT NULL($3)])
-  HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
-  HiveProject(c_customer_sk=[$0])
-HiveFilter(condition=[IS NOT NULL($0)])
-  HiveTableScan(table=[[default, customer]], 
table:alias=[customer])
+  HiveProject(c_customer_sk=[$0], $f1=[$1])
+HiveAggregate(group=[{2}], agg#0=[sum($1)])
+  HiveJoin(condition=[=($0, $2)], joinType=[inner], 
algorithm=[none], cost=[not available])
+HiveProject(ss_customer_sk=[$3], 
*=[*(CAST($10):DECIMAL(10, 0), $13)])
+  HiveFilter(condition=[IS NOT NULL($3)])
+HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
+HiveProject(c_customer_sk=[$0])
+  HiveFilter(condition=[IS NOT NULL($0)])
+HiveTableScan(table=[[default, customer]], 
table:alias=[customer])
   HiveJoin(condition=[true], joinType=[inner], 
algorithm=[none], cost=[not available])
 HiveProject(cnt=[$0])
   HiveFilter(condition=[<=(sq_count_check($0), 1)])
@@ -146,75 +145,71 @@ HiveSortLimit(fetch=[100])
 HiveProject
   HiveProject($f0=[$0])
 HiveAggregate(group=[{}], agg#0=[count($0)])
-  HiveProject($f0=[$0], $f1=[$1])
-HiveAggregate(group=[{0}], agg#0=[sum($1)])
-  HiveProject($f0=[$0], 
$f1=[*(CAST($3):DECIMAL(10, 0), $4)])
-HiveJoin(condition=[=($2, $0)], 
joinType=[inner], algorithm=[none], cost=[not available])
-  HiveProject(c_customer_sk=[$0])
-HiveFilter(condition=[IS NOT 
NULL($0)])
-  HiveTableScan(table=[[default, 
customer]], table:alias=[customer])
-  HiveJoin(condition=[=($0, $4)], 
joinType=[inner], algorithm=[none], cost=[not available])
-HiveProject(ss_sold_date_sk=[$0], 
ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13])
-  HiveFilter(condition=[AND(IS NOT 
NULL($3), IS NOT NULL($0))])
-HiveTableScan(table=[[default,

[27/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query4.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
index 67e8b4f..93e83ef 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
@@ -237,13 +237,13 @@ STAGE PLANS:
 Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 
(PARTITION-LEVEL SORT, 398)
 Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 
(PARTITION-LEVEL SORT, 975)
 Reducer 12 <- Reducer 11 (GROUP, 481)
-Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 306), Map 19 
(PARTITION-LEVEL SORT, 306)
-Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 873), Reducer 16 
(PARTITION-LEVEL SORT, 873)
-Reducer 18 <- Reducer 17 (GROUP, 369)
+Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 
(PARTITION-LEVEL SORT, 154)
+Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 
(PARTITION-LEVEL SORT, 706)
+Reducer 18 <- Reducer 17 (GROUP, 186)
 Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL 
SORT, 154)
-Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 
(PARTITION-LEVEL SORT, 154)
-Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 
(PARTITION-LEVEL SORT, 706)
-Reducer 24 <- Reducer 23 (GROUP, 186)
+Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 306), Map 25 
(PARTITION-LEVEL SORT, 306)
+Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 873), Reducer 22 
(PARTITION-LEVEL SORT, 873)
+Reducer 24 <- Reducer 23 (GROUP, 369)
 Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 306), Map 31 
(PARTITION-LEVEL SORT, 306)
 Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 873), Reducer 28 
(PARTITION-LEVEL SORT, 873)
 Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 
(PARTITION-LEVEL SORT, 706)
@@ -266,16 +266,15 @@ STAGE PLANS:
 predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), 
ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: 
decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int), ws_ext_list_price - ws_ext_wholesale_cost) 
- ws_ext_discount_amt) + ws_ext_sales_price) / 2) (type: decimal(14,6))
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 
(type: decimal(7,2))
-Execution mode: vectorized
+value expressions: _col1 (type: int), _col2 (type: 
decimal(14,6))
 Map 13 
 Map Operator Tree:
 TableScan
@@ -318,23 +317,22 @@ STAGE PLANS:
 Map 15 
 Map Operator Tree:
 TableScan
-  alias: catalog_sales
-  filterExpr: (cs_bill_customer_sk is not null and 
cs_sold_date_sk is not null) (type: boolean)
-  Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
+  alias: web_sales
+  filterExpr: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
+  Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
   Filter Operator
-predicate: (cs_bill_customer_sk is not null and 
cs_sold_date_sk is not null) (type: boolean)
-Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
+predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
+Statistics: Num rows: 144002668 Data size: 19580198212 
Basic

[01/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

Repository: hive
Updated Branches:
  refs/heads/master dca389b06 -> 558876462


http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out 
b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
index 44f9b5d..4190a21 100644
--- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
@@ -396,14 +396,14 @@ STAGE PLANS:
 Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: p_name (type: string), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
-  outputColumnNames: _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
   Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
-key expressions: _col1 (type: string)
+key expressions: _col0 (type: string)
 sort order: +
-Map-reduce partition columns: _col1 (type: string)
+Map-reduce partition columns: _col0 (type: string)
 Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 
(type: double), _col8 (type: string)
+value expressions: _col1 (type: string), _col2 (type: 
string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 
(type: double), _col7 (type: string)
 Execution mode: vectorized
 Map 5 
 Map Operator Tree:
@@ -426,13 +426,13 @@ STAGE PLANS:
  Inner Join 0 to 1
 keys:
   0 _col1 (type: string)
-  1 _col1 (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, 
_col17
+  1 _col0 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16
 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
 Reduce Output Operator
   sort order: 
   Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 
(type: string), _col11 (type: string), _col12 (type: string), _col13 (type: 
string), _col14 (type: int), _col15 (type: string), _col16 (type: double), 
_col17 (type: string)
+  value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 
(type: string), _col10 (type: string), _col11 (type: string), _col12 (type: 
string), _col13 (type: int), _col14 (type: string), _col15 (type: double), 
_col16 (type: string)
 Reducer 3 
 Reduce Operator Tree:
   Join Operator
@@ -441,10 +441,10 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, 
_col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25
 Statistics: Num rows: 728 Data size: 178830 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: _col18 (type: int), _col19 (type: string), 
_col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 
(type: int), _col24 (type: string), _col25 (type: double), _col26 (type: 
string), 1 (type: int), _col10 (type: string),

[24/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query75.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
index 85e6dca..553d11a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
@@ -219,14 +219,14 @@ STAGE PLANS:
 Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_sk (type: int), i_brand_id (type: 
int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: 
int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col5
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 231000 Data size: 331780228 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col5 (type: int)
+value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int)
 Execution mode: vectorized
 Map 11 
 Map Operator Tree:
@@ -397,14 +397,14 @@ STAGE PLANS:
 Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_sk (type: int), i_brand_id (type: 
int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: 
int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col5
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 231000 Data size: 331780228 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col5 (type: int)
+value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int)
 Execution mode: vectorized
 Map 34 
 Map Operator Tree:
@@ -530,23 +530,23 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, 
_col10, _col12
+outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, 
_col9, _col10
 Statistics: Num rows: 696954748 Data size: 61485550191 Basic 
stats: COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col1 (type: int), _col2 (type: int)
   sort order: ++
   Map-reduce partition columns: _col1 (type: int), _col2 
(type: int)
   Statistics: Num rows: 696954748 Data size: 61485550191 Basic 
stats: COMPLETE Column stats: NONE
-  value expressions: _col3 (type: int), _col4 (type: 
decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 
(type: int)
+  value expressions: _col3 (type: int), _col4 (type: 
decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 
(type: int)
 Reducer 15 
 Reduce Operator Tree:
   Join Operator
 condition map:
  Left Outer Join 0 to 1
-outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, 
_col15, _col16
+outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, 
_col13, _col14
 Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: _col8 (type: int), _col9 (type: int), _col10 
(type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN 
(_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) 
THEN (_col16) ELSE (0) END) (type: decimal(8,2))
+  expressions: _col7 (type: int), _col8

[07/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query75.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query75.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query75.q.out
index 9968ade..f4bd046 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query75.q.out
@@ -244,7 +244,7 @@ Stage-0
   Select Operator [SEL_539] 
(rows=170474971 width=131)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
 Merge Join Operator 
[MERGEJOIN_538] (rows=170474971 width=234)
-  Conds:RS_103._col1, 
_col2=RS_625._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"]
+  Conds:RS_103._col1, 
_col2=RS_625._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"]
 <-Map 44 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_625]
 PartitionCols:_col0, _col1
@@ -258,12 +258,12 @@ Stage-0
   SHUFFLE [RS_103]
 PartitionCols:_col1, _col2
 Merge Join Operator 
[MERGEJOIN_510] (rows=96821196 width=138)
-  
Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"]
+  
Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"]
 <-Map 37 [SIMPLE_EDGE] 
vectorized
   PARTITION_ONLY_SHUFFLE 
[RS_599]
 PartitionCols:_col0
-Select Operator [SEL_592] 
(rows=45745 width=109)
-  
Output:["_col0","_col1","_col2","_col3","_col5"]
+Select Operator [SEL_592] 
(rows=45745 width=19)
+  
Output:["_col0","_col1","_col2","_col3","_col4"]
   Filter Operator 
[FIL_591] (rows=45745 width=109)
 predicate:((i_category 
= 'Sports') and i_brand_id is not null and i_category_id is not null and 
i_class_id is not null and i_item_sk is not null and i_manufact_id is not null)
 TableScan [TS_6] 
(rows=462000 width=109)
@@ -276,7 +276,7 @@ Stage-0
 <-Map 11 [SIMPLE_EDGE] 
vectorized
   PARTITION_ONLY_SHUFFLE 
[RS_571]
 PartitionCols:_col0
-Select Operator 
[SEL_562] (rows=652 width=8)
+Select Operator 
[SEL_562] (rows=652 width=4)
   Output:["_col0"]
   Filter Operator 
[FIL_558] (rows=652 width=8)
 predicate:((d_year 
= 2002) and d_date_sk is not null)
@@ -321,7 +321,7 @@ Stage-0
   Select Operator [SEL_548] 
(rows=450703984 width=131)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
 Merge Join Operator 
[MERGEJOIN_547] (rows=450703984 width=204)
-  Conds:RS_125._col1, 
_col2=RS_649._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"]
+  Conds:RS_125._col1, 
_col2=RS_649._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"]
 <-Map 46 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_649]
 PartitionCols:_col0, _col1
@@ -335,7 +335,7 @@ Stage-0
   SHUFFLE [RS_125]
 PartitionCols:_col1, _col2

[31/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_3.q.out
--
diff --git a/ql/src/test/results/clientpositive/masking_3.q.out 
b/ql/src/test/results/clientpositive/masking_3.q.out
index 15a8963..725d905 100644
--- a/ql/src/test/results/clientpositive/masking_3.q.out
+++ b/ql/src/test/results/clientpositive/masking_3.q.out
@@ -54,12 +54,16 @@ STAGE PLANS:
   mode: mergepartial
   outputColumnNames: _col0
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+  Select Operator
+expressions: _col0 (type: int), UDFToDouble(_col0) (type: double)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
 Map Reduce
@@ -72,20 +76,20 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col0 (type: string)
   TableScan
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   value expressions: _col0 (type: int)
   Reduce Operator Tree:
@@ -93,21 +97,25 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 UDFToDouble(_col0) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1
+0 _col1 (type: double)
+1 _col1 (type: double)
+  outputColumnNames: _col0, _col2
   Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-  Group By Operator
-keys: _col0 (type: string), _col1 (type: int)
-mode: hash
+  Select Operator
+expressions: _col0 (type: string), _col2 (type: int)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+Group By Operator
+  keys: _col0 (type: string), _col1 (type: int)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1

[04/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/pointlookup3.q.out
--
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out 
b/ql/src/test/results/clientpositive/pointlookup3.q.out
index a5fa5e8..a3056a5 100644
--- a/ql/src/test/results/clientpositive/pointlookup3.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -391,15 +391,15 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@pcr_t1_n1
 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08
  A masked pattern was here 
-OPTIMIZED SQL: SELECT `t4`.`key`, `t4`.`value`, CAST('2000-04-08' AS STRING) 
AS `ds1`, `t4`.`ds2`, `t4`.`key1`, `t4`.`value1`, `t4`.`ds11`, 
CAST('2001-04-08' AS STRING) AS `ds21`
-FROM (SELECT `t0`.`key`, `t0`.`value`, `t0`.`ds2`, `t2`.`key` AS `key1`, 
`t2`.`value` AS `value1`, `t2`.`ds1` AS `ds11`
-FROM (SELECT `key`, `value`, CAST('2000-04-08' AS STRING) AS `ds1`, `ds2`
+OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) 
AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, 
`t3`.`ds1` AS `ds11`, CAST('2001-04-08' AS STRING) AS `ds21`
+FROM (SELECT *
+FROM (SELECT `key`, `value`, `ds2`
 FROM `default`.`pcr_t1_n1`
 WHERE `ds1` = '2000-04-08' AND `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`, `ds1`, CAST('2001-04-08' AS STRING) AS `ds2`
+INNER JOIN (SELECT `key`, `value`, `ds1`
 FROM `default`.`pcr_t1_n1`
 WHERE `ds2` = '2001-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = 
`t2`.`key`
-ORDER BY `t2`.`key`, `t2`.`value`) AS `t4`
+ORDER BY `t2`.`key`, `t2`.`value`) AS `t3`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -420,7 +420,7 @@ STAGE PLANS:
   Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: key (type: int), value (type: string), ds2 (type: 
string)
-outputColumnNames: _col0, _col1, _col3
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: int)
@@ -429,7 +429,7 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: int)
   Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
   tag: 0
-  value expressions: _col1 (type: string), _col3 (type: string)
+  value expressions: _col1 (type: string), _col2 (type: string)
   auto parallelism: false
   TableScan
 alias: t2
@@ -515,30 +515,26 @@ STAGE PLANS:
   keys:
 0 _col0 (type: int)
 1 _col0 (type: int)
-  outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column 
stats: NONE
-  Select Operator
-expressions: _col0 (type: int), _col1 (type: string), _col3 (type: 
string), _col4 (type: int), _col5 (type: string), _col6 (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  GlobalTableId: 0
+  File Output Operator
+compressed: false
+GlobalTableId: 0
  A masked pattern was here 
-  NumFilesPerFileSink: 1
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  properties:
-column.name.delimiter ,
-columns _col0,_col1,_col2,_col3,_col4,_col5
-columns.types int,string,string,int,string,string
-escape.delim \
-serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-  TotalFiles: 1
-  GatherStats: false
-  MultiFileSpray: false
+NumFilesPerFileSink: 1
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+properties:
+  column.name.delimiter ,
+  columns _col0,_col1,_col2,_col3,_col4,_col5
+  columns.types int,string,string,int,string,string
+  escape.delim \
+

[30/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_4.q.out
--
diff --git a/ql/src/test/results/clientpositive/masking_4.q.out 
b/ql/src/test/results/clientpositive/masking_4.q.out
index 60cbd0f..54861b0 100644
--- a/ql/src/test/results/clientpositive/masking_4.q.out
+++ b/ql/src/test/results/clientpositive/masking_4.q.out
@@ -210,12 +210,16 @@ STAGE PLANS:
   mode: mergepartial
   outputColumnNames: _col0
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+  Select Operator
+expressions: _col0 (type: int), UDFToDouble(_col0) (type: double)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
 Map Reduce
@@ -228,20 +232,20 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col0 (type: string)
   TableScan
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   value expressions: _col0 (type: int)
   Reduce Operator Tree:
@@ -249,21 +253,25 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 UDFToDouble(_col0) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1
+0 _col1 (type: double)
+1 _col1 (type: double)
+  outputColumnNames: _col0, _col2
   Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-  Group By Operator
-keys: _col0 (type: string), _col1 (type: int)
-mode: hash
+  Select Operator
+expressions: _col0 (type: string), _col2 (type: int)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+Group By Operator
+  keys: _col0 (type: string), _col1 (type: int)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage:

[43/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out 
b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index accb3a7..6954647 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -535,14 +535,14 @@ STAGE PLANS:
   filterExpr: ds is not null (type: boolean)
   Statistics: Num rows: 2000 Data size: 389248 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: ds (type: string)
+expressions: day(CAST( ds AS DATE)) (type: int)
 outputColumnNames: _col0
-Statistics: Num rows: 2000 Data size: 368000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 2000 Data size: 8000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
-  key expressions: day(CAST( _col0 AS DATE)) (type: int)
+  key expressions: _col0 (type: int)
   sort order: +
-  Map-reduce partition columns: day(CAST( _col0 AS DATE)) 
(type: int)
-  Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 2000 Data size: 8000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
 Map 4 
@@ -555,16 +555,16 @@ STAGE PLANS:
 predicate: ((date = '2008-04-08') and ds is not null) 
(type: boolean)
 Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: ds (type: string)
+  expressions: day(CAST( ds AS DATE)) (type: int)
   outputColumnNames: _col0
   Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
-key expressions: day(CAST( _col0 AS DATE)) (type: int)
+key expressions: _col0 (type: int)
 sort order: +
-Map-reduce partition columns: day(CAST( _col0 AS 
DATE)) (type: int)
+Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
   Select Operator
-expressions: day(CAST( _col0 AS DATE)) (type: int)
+expressions: _col0 (type: int)
 outputColumnNames: _col0
 Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
@@ -587,9 +587,9 @@ STAGE PLANS:
 condition map:
  Inner Join 0 to 1
 keys:
-  0 day(CAST( _col0 AS DATE)) (type: int)
-  1 day(CAST( _col0 AS DATE)) (type: int)
-Statistics: Num rows: 2200 Data size: 404800 Basic stats: 
COMPLETE Column stats: NONE
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+Statistics: Num rows: 2200 Data size: 8800 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: count()
   mode: hash
@@ -678,14 +678,14 @@ STAGE PLANS:
   filterExpr: ds is not null (type: boolean)
   Statistics: Num rows: 2000 Data size: 389248 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: ds (type: string)
+expressions: day(CAST( ds AS DATE)) (type: int)
 outputColumnNames: _col0
-Statistics: Num rows: 2000 Data size: 368000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 2000 Data size: 8000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
-  key expressions: day(CAST( _col0 AS DATE)) (type: int)
+  key expressions: _col0 (type: int)
   sort order: +
-  Map-reduce partition columns: day(CAST( _col0 AS DATE)) 
(type: int)
-  Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
+

[51/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

HIVE-20850: Push case conditional from projections to dimension tables if 
possible (Zoltan Haindrich via Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55887646
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55887646
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55887646

Branch: refs/heads/master
Commit: 558876462d2589423d7131b51c24dbf61b8a22b9
Parents: dca389b
Author: Zoltan Haindrich 
Authored: Mon Nov 12 10:03:28 2018 -0800
Committer: Jesus Camacho Rodriguez 
Committed: Mon Nov 12 10:03:57 2018 -0800

--
 .../results/positive/accumulo_queries.q.out |   34 +-
 .../test/results/positive/hbase_queries.q.out   |   34 +-
 .../hive/jdbc/AbstractJdbcTriggersTest.java |5 +-
 .../jdbc/TestTriggersMoveWorkloadManager.java   |9 +-
 .../jdbc/TestTriggersTezSessionPoolManager.java |   21 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java|2 +-
 .../bucket_mapjoin_mismatch1.q.out  |4 +-
 .../clientpositive/allcolref_in_udf.q.out   |   28 +-
 .../annotate_stats_join_pkfk.q.out  |   40 +-
 .../results/clientpositive/auto_join13.q.out|6 +-
 .../results/clientpositive/auto_join19.q.out|4 +-
 .../clientpositive/auto_join19_inclause.q.out   |4 +-
 .../results/clientpositive/auto_join2.q.out |   24 +-
 .../results/clientpositive/auto_join32.q.out|6 +-
 .../results/clientpositive/auto_join9.q.out |4 +-
 .../clientpositive/auto_join_stats.q.out|  122 +-
 .../clientpositive/auto_join_stats2.q.out   |   90 +-
 .../materialized_view_create_rewrite.q.out  |8 +-
 .../clientpositive/bucket_map_join_spark1.q.out |4 +-
 .../clientpositive/bucket_map_join_spark2.q.out |4 +-
 .../clientpositive/bucket_map_join_spark3.q.out |4 +-
 .../clientpositive/bucket_map_join_spark4.q.out |4 +-
 .../bucketsortoptimize_insert_4.q.out   |4 +-
 .../bucketsortoptimize_insert_5.q.out   |4 +-
 .../bucketsortoptimize_insert_8.q.out   |8 +-
 .../test/results/clientpositive/cbo_const.q.out |   82 +-
 .../results/clientpositive/cbo_rp_join1.q.out   |   80 +-
 .../clientpositive/cbo_rp_outer_join_ppr.q.out  |8 +-
 .../constantPropagateForSubQuery.q.out  |   10 +-
 .../results/clientpositive/constprog2.q.out |   16 +-
 .../clientpositive/constprog_partitioner.q.out  |8 +-
 .../clientpositive/correlationoptimizer8.q.out  |   58 +-
 .../test/results/clientpositive/cte_mat_5.q.out |   18 +-
 .../results/clientpositive/deleteAnalyze.q.out  |   18 +-
 .../clientpositive/druid/druidmini_mv.q.out |   16 +-
 .../encryption_join_unencrypted_tbl.q.out   |   76 +-
 .../clientpositive/filter_cond_pushdown.q.out   |   90 +-
 .../clientpositive/filter_join_breaktask.q.out  |   24 +-
 .../infer_bucket_sort_map_operators.q.out   |   24 +-
 .../clientpositive/infer_join_preds.q.out   |   22 +-
 .../results/clientpositive/innerjoin1.q.out |   76 +-
 .../test/results/clientpositive/input23.q.out   |8 +-
 .../results/clientpositive/interval_3.q.out |   16 +-
 ql/src/test/results/clientpositive/join13.q.out |8 +-
 ql/src/test/results/clientpositive/join2.q.out  |   32 +-
 ql/src/test/results/clientpositive/join26.q.out |2 +-
 ql/src/test/results/clientpositive/join32.q.out |2 +-
 ql/src/test/results/clientpositive/join33.q.out |2 +-
 ql/src/test/results/clientpositive/join42.q.out |   28 +-
 ql/src/test/results/clientpositive/join45.q.out |  392 +-
 ql/src/test/results/clientpositive/join46.q.out |  290 +-
 ql/src/test/results/clientpositive/join47.q.out |  392 +-
 ql/src/test/results/clientpositive/join9.q.out  |6 +-
 .../clientpositive/join_cond_pushdown_1.q.out   |   18 +-
 .../clientpositive/join_cond_pushdown_3.q.out   |   18 +-
 .../join_cond_pushdown_unqual1.q.out|   18 +-
 .../join_cond_pushdown_unqual3.q.out|   18 +-
 .../clientpositive/join_emit_interval.q.out |   52 +-
 .../clientpositive/join_filters_overlap.q.out   |  338 +-
 .../results/clientpositive/join_merging.q.out   |  152 +-
 .../test/results/clientpositive/join_view.q.out |4 +-
 .../clientpositive/llap/auto_join_filters.q.out |   60 +-
 .../llap/auto_sortmerge_join_6.q.out|  174 +-
 .../llap/bucket_map_join_tez2.q.out |  116 +-
 .../clientpositive/llap/bucketmapjoin1.q.out|   20 +-
 .../clientpositive/llap/bucketmapjoin2.q.out|4 +-
 .../clientpositive/llap/bucketmapjoin3.q.out|   16 +-
 .../llap/bucketsortoptimize_insert_2.q.out  |   24 +-
 .../llap/bucketsortoptimize_insert_6.q.out  |  301 +-
 .../llap/bucketsortoptimize_insert_7.q.out  |8 +-
 .../clientpositive/llap/check_constraint.q.out  |   43 +-
 .../llap/constprog_semijoin.q.out   |   54 +-
 .../llap/constraints_optimization.q.out |

[14/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query23.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
index 7784792..059195a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
@@ -1,7 +1,7 @@
-Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
-Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
-Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
-Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
+Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
+Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
+Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
+Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
 PREHOOK: query: explain
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date 
solddate,count(*) cnt
@@ -166,399 +166,391 @@ Stage-0
 limit:100
 Stage-1
   Reducer 6 vectorized
-  File Output Operator [FS_699]
-Limit [LIM_698] (rows=1 width=112)
+  File Output Operator [FS_689]
+Limit [LIM_688] (rows=1 width=112)
   Number of rows:100
-  Group By Operator [GBY_697] (rows=1 width=112)
+  Group By Operator [GBY_687] (rows=1 width=112)
 Output:["_col0"],aggregations:["sum(VALUE._col0)"]
   <-Union 5 [CUSTOM_SIMPLE_EDGE]
 <-Reducer 12 [CONTAINS]
-  Reduce Output Operator [RS_608]
-Group By Operator [GBY_607] (rows=1 width=112)
+  Reduce Output Operator [RS_598]
+Group By Operator [GBY_597] (rows=1 width=112)
   Output:["_col0"],aggregations:["sum(_col0)"]
-  Select Operator [SEL_605] (rows=1 width=112)
+  Select Operator [SEL_595] (rows=1 width=112)
 Output:["_col0"]
-Merge Join Operator [MERGEJOIN_604] (rows=1 width=116)
-  
Conds:RS_248._col2=RS_249._col0(Inner),Output:["_col3","_col4"]
+Merge Join Operator [MERGEJOIN_594] (rows=1 width=116)
+  
Conds:RS_240._col2=RS_241._col0(Inner),Output:["_col3","_col4"]
 <-Reducer 11 [SIMPLE_EDGE]
-  PARTITION_ONLY_SHUFFLE [RS_248]
+  PARTITION_ONLY_SHUFFLE [RS_240]
 PartitionCols:_col2
-Merge Join Operator [MERGEJOIN_592] (rows=155 width=0)
-  
Conds:RS_245._col1=RS_642._col0(Inner),Output:["_col2","_col3","_col4"]
+Merge Join Operator [MERGEJOIN_582] (rows=155 width=0)
+  
Conds:RS_237._col1=RS_632._col0(Inner),Output:["_col2","_col3","_col4"]
 <-Reducer 18 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_642]
+  SHUFFLE [RS_632]
 PartitionCols:_col0
-Group By Operator [GBY_639] (rows=2235 width=4)
+Group By Operator [GBY_629] (rows=2235 width=4)
   Output:["_col0"],keys:_col1
-  Select Operator [SEL_638] (rows=6548799 width=12)
+  Select Operator [SEL_628] (rows=6548799 
width=290)
 Output:["_col1"]
-Filter Operator [FIL_637] (rows=6548799 
width=12)
+Filter Operator [FIL_627] (rows=6548799 
width=290)
   predicate:(_col3 > 4L)
-  Select Operator [SEL_636] (rows=19646398 
width=12)
-Output:["_col0","_col3"]
-Group By Operator [GBY_635] (rows=19646398 
width=290)
+  Select Operator [SEL_626] (rows=19646398 
width=290)
+Output:["_col1","_col3"]
+Group By Operator [GBY_625] (rows=19646398 
width=290)
   
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2
 <-Reducer 17 [SIMPLE_EDGE]
-

[22/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
index ccad088..19f3039 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
@@ -114,28 +114,28 @@ POSTHOOK: Input: default@store_sales
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 CBO PLAN:
 HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], 
$f3=[CAST($4):DECIMAL(17, 2)])
-  HiveAggregate(group=[{}], agg#0=[sum($16)], agg#1=[count($16)], 
agg#2=[sum($18)], agg#3=[count($18)], agg#4=[sum($19)], agg#5=[count($19)])
-HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, 
_UTF-16LE'4 yr Degree'), BETWEEN(false, $17, 100, 150), =($7, 3)), AND(=($1, 
_UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $17, 50, 100), =($7, 
1)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, 
$17, 150, 200), =($7, 1], joinType=[inner], algorithm=[none], cost=[not 
available])
-  HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3])
+  HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)], 
agg#2=[sum($22)], agg#3=[count($22)], agg#4=[sum($23)], agg#5=[count($23)])
+HiveJoin(condition=[AND(=($0, $17), OR(AND($1, $2, $27, $12), AND($3, $4, 
$28, $13), AND($5, $6, $29, $13)))], joinType=[inner], algorithm=[none], 
cost=[not available])
+  HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, 
_UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, 
_UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced 
Degree')])
 HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', 
_UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', 
_UTF-16LE'Advanced Degree'), IS NOT NULL($0))])
   HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[customer_demographics])
-  HiveJoin(condition=[AND(=($11, $0), OR(AND(IN($1, _UTF-16LE'KY', 
_UTF-16LE'GA', _UTF-16LE'NM'), BETWEEN(false, $17, 100, 200)), AND(IN($1, 
_UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN'), BETWEEN(false, $17, 150, 300)), 
AND(IN($1, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), BETWEEN(false, $17, 
50, 250], joinType=[inner], algorithm=[none], cost=[not available])
-HiveProject(ca_address_sk=[$0], ca_state=[$8], 
ca_country=[CAST(_UTF-16LE'United States'):VARCHAR(2147483647) CHARACTER SET 
"UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"])
+  HiveJoin(condition=[AND(=($12, $0), OR(AND($1, $17), AND($2, $18), 
AND($3, $19)))], joinType=[inner], algorithm=[none], cost=[not available])
+HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', 
_UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', 
_UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')])
   HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', 
_UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', 
_UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT 
NULL($0))])
 HiveTableScan(table=[[default, customer_address]], 
table:alias=[customer_address])
 HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
-  HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3])
+  HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)])
 HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))])
   HiveTableScan(table=[[default, household_demographics]], 
table:alias=[household_demographics])
-  HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
-HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER])
-  HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))])
-HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-HiveJoin(condition=[=($0, $5)], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject(s_store_sk=[$0])
-HiveFilter(condition=[IS NOT NULL($0)])
-  HiveTableScan(table=[[default, store]], table:alias=[store])
-  HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], 
ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], 
ss_sales_price=[$13], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], 
ss_net_profit=[$22])
+  HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], 
cost=[not available])
+HiveProject(s_store_sk=[$0])
+  HiveFilter(condition=[IS NOT NULL($0)])
+HiveTableScan(table=[[default, store]], table:alias=[store])
+

[06/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
index f5800b9..1ada394 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
@@ -183,15 +183,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 11 <- Reducer 15 (BROADCAST_EDGE)
+Map 11 <- Reducer 13 (BROADCAST_EDGE)
 Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
-Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE)
+Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE)
-Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 8 <- Map 17 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
 Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
@@ -200,134 +200,138 @@ Stage-0
 limit:-1
 Stage-1
   Reducer 10 vectorized
-  File Output Operator [FS_239]
-Limit [LIM_238] (rows=72 width=832)
+  File Output Operator [FS_240]
+Limit [LIM_239] (rows=7 width=832)
   Number of rows:100
-  Select Operator [SEL_237] (rows=72 width=832)
+  Select Operator [SEL_238] (rows=7 width=832)
 Output:["_col0","_col1","_col2","_col3"]
   <-Reducer 9 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_236]
-  Select Operator [SEL_235] (rows=72 width=832)
+SHUFFLE [RS_237]
+  Select Operator [SEL_236] (rows=7 width=832)
 Output:["_col4","_col5","_col6","_col7"]
-Group By Operator [GBY_234] (rows=72 width=353)
+Group By Operator [GBY_235] (rows=7 width=353)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0
 <-Reducer 8 [SIMPLE_EDGE]
   SHUFFLE [RS_49]
 PartitionCols:_col0
-Group By Operator [GBY_48] (rows=72 width=353)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22
-  Merge Join Operator [MERGEJOIN_206] (rows=8055 width=100)
-Conds:RS_44._col3, _col24, _col25=RS_232._col0, _col1, 
_col2(Inner),Output:["_col6","_col7","_col12","_col22"]
-  <-Map 17 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_232]
-  PartitionCols:_col0, _col1, _col2
-  Select Operator [SEL_231] (rows=265971 width=183)
-Output:["_col0","_col1","_col2"]
-Filter Operator [FIL_230] (rows=265971 width=183)
-  predicate:((cd_education_status) IN ('4 yr 
Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 
'U') and cd_demo_sk is not null)
-  TableScan [TS_21] (rows=1861800 width=183)
-
default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
-  <-Reducer 7 [SIMPLE_EDGE]
-SHUFFLE [RS_44]
-  PartitionCols:_col3, _col24, _col25
-  Filter Operator [FIL_43] (rows=8055 width=390)
-predicate:(((_col24 = 'D') and (_col25 = 
'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 
yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 
'Advanced Degree') and _col13 BETWEEN 150 AND 200))
-Merge Join Operator [MERGEJOIN_205] (rows=24166 
width=390)
-  
Conds:RS_40._col1=RS_233._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"]
-<-Map 17 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_233]
-PartitionCols:_col0
- Please refer to the previous Select Operator

[23/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
index 6bdbf7e..a7bf288 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
@@ -182,8 +182,7 @@ POSTHOOK: Input: default@web_sales
  A masked pattern was here 
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
@@ -191,47 +190,42 @@ STAGE PLANS:
 Spark
  A masked pattern was here 
   Vertices:
-Map 13 
+Map 12 
 Map Operator Tree:
 TableScan
-  alias: reason
-  filterExpr: r_reason_sk is not null (type: boolean)
-  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
+  alias: web_page
+  filterExpr: wp_web_page_sk is not null (type: boolean)
+  Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
-predicate: r_reason_sk is not null (type: boolean)
-Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
+predicate: wp_web_page_sk is not null (type: boolean)
+Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: r_reason_sk (type: int), r_reason_desc 
(type: string)
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
+  expressions: wp_web_page_sk (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 4602 Data size: 2696178 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
 keys:
-  0 _col4 (type: int)
+  0 _col10 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
 Local Work:
   Map Reduce Local Work
-
-  Stage: Stage-3
-Spark
- A masked pattern was here 
-  Vertices:
-Map 11 
+Map 13 
 Map Operator Tree:
 TableScan
-  alias: web_page
-  filterExpr: wp_web_page_sk is not null (type: boolean)
-  Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
+  alias: reason
+  filterExpr: r_reason_sk is not null (type: boolean)
+  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
-predicate: wp_web_page_sk is not null (type: boolean)
-Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
+predicate: r_reason_sk is not null (type: boolean)
+Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: wp_web_page_sk (type: int)
-  outputColumnNames: _col0
-  Statistics: Num rows: 4602 Data size: 2696178 Basic 
stats: COMPLETE Column stats: NONE
+  expressions: r_reason_sk (type: int), r_reason_desc 
(type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
 keys:
-  0 _col10 (type: int)
+  0 _col4 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
 Local Work:
@@ -241,11 +235,11 @@ STAGE PLANS:
 Spark
   Edges:
 Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL 
SORT, 28)
-Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 178), Reducer 2 
(PARTITION-LEVEL SORT, 178)
-Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 65), Reducer 3 
(PARTITION-LEVEL SORT, 65)
-Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 83), Reducer 4 
(PARTITION-LEVEL SORT, 83)
-Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 13), Reducer 5 
(PARTITION-LEVEL SORT, 13)
-Reducer 7

[36/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out 
b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
index 37970ab..7e09d5e 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
@@ -51,14 +51,14 @@ STAGE PLANS:
 predicate: (csmallint < 100S) (type: boolean)
 Statistics: Num rows: 4096 Data size: 1031250 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
-  Statistics: Num rows: 4096 Data size: 1031250 Basic 
stats: COMPLETE Column stats: COMPLETE
+  expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+  Statistics: Num rows: 4096 Data size: 1043486 Basic 
stats: COMPLETE Column stats: COMPLETE
   Reduce Output Operator
-key expressions: UDFToInteger(_col1) (type: int)
+key expressions: _col12 (type: int)
 sort order: +
-Map-reduce partition columns: UDFToInteger(_col1) 
(type: int)
-Statistics: Num rows: 4096 Data size: 1031250 Basic 
stats: COMPLETE Column stats: COMPLETE
+Map-reduce partition columns: _col12 (type: int)
+Statistics: Num rows: 4096 Data size: 1043486 Basic 
stats: COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 
(type: double), _col6 (type: string), _col7 (type: string), _col8 (type: 
timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: 
boolean)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -72,14 +72,14 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: key (type: string)
+  expressions: UDFToInteger(key) (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
-key expressions: UDFToInteger(_col0) (type: int)
+key expressions: _col0 (type: int)
 sort order: +
-Map-reduce partition columns: UDFToInteger(_col0) 
(type: int)
-Statistics: Num rows: 500 Data size: 43500 Basic 
stats: COMPLETE Column stats: COMPLETE
+Map-reduce partition columns: _col0 (type: int)
+Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 5 
@@ -92,14 +92,14 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: key (type: string)
+  expressions: (UDFToInteger(key) + 0) (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats:

[38/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index c72e4b2..c43ad91 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -109,13 +109,13 @@ STAGE PLANS:
   alias: part
   Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: p_partkey (type: int), p_name (type: string), 
p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size 
(type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
-Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: p_partkey (type: int), p_name (type: string), 
p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size 
(type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string), UDFToDouble(p_size) (type: double)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+Statistics: Num rows: 26 Data size: 16302 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: 
string)
+  Statistics: Num rows: 26 Data size: 16302 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: 
string), _col9 (type: double)
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 3 
@@ -147,16 +147,16 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
-residual filter predicates: {(UDFToDouble(_col5) > _col9)}
-Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE 
Column stats: NONE
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
+residual filter predicates: {(_col9 > _col10)}
+Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), 
_col6 (type: string), _col7 (type: double), _col8 (type: string)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-  Statistics: Num rows: 8 Data size: 5120 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 8 Data size: 5184 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
-Statistics: Num rows: 8 Data size: 5120 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 8 Data size: 5184 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -397,12 +397,12 @@ STAGE PLANS:
 Statistics: Num rows: 1 Data size: 619 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
-  outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-  Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4,

[09/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query64.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query64.q.out
index f670c4f..7c77e9f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out
@@ -265,9 +265,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 47 
(BROADCAST_EDGE)
-Map 44 <- Reducer 40 (BROADCAST_EDGE)
-Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 
(BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE)
+Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 46 
(BROADCAST_EDGE)
+Map 43 <- Reducer 40 (BROADCAST_EDGE)
+Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 
(BROADCAST_EDGE), Reducer 50 (BROADCAST_EDGE)
 Map 56 <- Reducer 42 (BROADCAST_EDGE)
 Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
 Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
@@ -276,18 +276,18 @@ Reducer 13 <- Reducer 31 (SIMPLE_EDGE), Reducer 6 
(SIMPLE_EDGE)
 Reducer 14 <- Map 54 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE)
 Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
 Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE)
-Reducer 18 <- Map 43 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
-Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 45 (ONE_TO_ONE_EDGE)
+Reducer 19 <- Map 51 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
-Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 46 (ONE_TO_ONE_EDGE)
+Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
 Reducer 21 <- Map 52 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
 Reducer 22 <- Map 36 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
 Reducer 23 <- Map 53 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
 Reducer 24 <- Map 16 (CUSTOM_SIMPLE_EDGE)
 Reducer 25 <- Map 16 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE)
-Reducer 26 <- Map 43 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE)
-Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
-Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 50 (ONE_TO_ONE_EDGE)
+Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 49 (ONE_TO_ONE_EDGE)
+Reducer 27 <- Map 51 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE)
+Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
 Reducer 29 <- Map 52 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE)
 Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 Reducer 30 <- Map 36 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE)
@@ -299,13 +299,13 @@ Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 34 
(SIMPLE_EDGE)
 Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE)
 Reducer 41 <- Map 39 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE)
 Reducer 42 <- Map 39 (CUSTOM_SIMPLE_EDGE)
-Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE)
-Reducer 46 <- Reducer 45 (SIMPLE_EDGE)
-Reducer 47 <- Reducer 46 (CUSTOM_SIMPLE_EDGE)
-Reducer 49 <- Map 48 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE)
+Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE)
+Reducer 45 <- Reducer 44 (SIMPLE_EDGE)
+Reducer 46 <- Reducer 45 (CUSTOM_SIMPLE_EDGE)
+Reducer 48 <- Map 47 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE)
+Reducer 49 <- Reducer 48 (SIMPLE_EDGE)
 Reducer 5 <- Map 36 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 50 <- Reducer 49 (SIMPLE_EDGE)
-Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE)
+Reducer 50 <- Reducer 49 (CUSTOM_SIMPLE_EDGE)
 Reducer 6 <- Map 54 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
 Reducer 7 <- Reducer 23 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 8 <- Map 54 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
@@ -320,10 +320,10 @@ Stage-0
 Select Operator [SEL_1200] (rows=2169965329 width=1702)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"]
 <-Reducer 10 [SIMPLE_EDGE]
-  SHUFFLE [RS_259]
-Select Operator [SEL_258] (rows=2169965329 width=1694)
+  SHUFFLE [RS_257]
+Select Operator [SEL_256] (rows=2169965329 width=1694)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"]
-  Filter Operator [FIL_257] (rows=2169965329 width=1694)
+  Filter Operator [FIL_255] (rows=2169965329 width=1694)
 predicate:(_col19 <= _col12)
 Merge Join Operator [MERGEJOIN_1087] (rows=6509895988 
width=1694)
   Conds:RS_1171._col2, _col1, _col3=RS_1199._col1, _col0,

[19/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
index 2504d78..b4410ff 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
@@ -141,10 +141,10 @@ POSTHOOK: Input: default@web_sales
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 CBO PLAN:
 HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
-  HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), 
$5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], 
cs_dev=[*(/(/($1, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), 
CAST(100):DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), 
$5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($3, 
$1), $5), CAST(3):DECIMAL(10, 0))])
-HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, *(0.9, 
$5), *(1.1, $5))), BETWEEN(false, $1, *(0.9, $5), *(1.1, $5))), BETWEEN(false, 
$5, *(0.9, $3), *(1.1, $3))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $1)))], 
joinType=[inner], algorithm=[none], cost=[not available])
-  HiveJoin(condition=[AND(AND(=($2, $0), BETWEEN(false, $3, *(0.9, $1), 
*(1.1, $1))), BETWEEN(false, $1, *(0.9, $3), *(1.1, $3)))], joinType=[inner], 
algorithm=[none], cost=[not available])
-HiveProject(i_item_id=[$0], $f1=[$1])
+  HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), 
$9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], 
cs_dev=[*(/(/($1, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), 
CAST(100):DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), 
$9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($5, 
$1), $9), CAST(3):DECIMAL(10, 0))])
+HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, 
$11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $7)), 
BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not 
available])
+  HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $2, $3)), 
BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not 
available])
+HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)])
   HiveAggregate(group=[{4}], agg#0=[sum($2)])
 HiveJoin(condition=[=($0, $5)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveJoin(condition=[=($1, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])
@@ -175,7 +175,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], 
dir1=[ASC], fetch=[100])
   HiveProject(d_week_seq=[$4])
 HiveFilter(condition=[AND(=($2, 
_UTF-16LE'1998-02-19'), IS NOT NULL($4))])
   HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-HiveProject(i_item_id=[$0], $f1=[$1])
+HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)])
   HiveAggregate(group=[{4}], agg#0=[sum($2)])
 HiveJoin(condition=[=($0, $5)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveJoin(condition=[=($1, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])
@@ -206,7 +206,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], 
dir1=[ASC], fetch=[100])
   HiveProject(d_week_seq=[$4])
 HiveFilter(condition=[AND(=($2, 
_UTF-16LE'1998-02-19'), IS NOT NULL($4))])
   HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-  HiveProject(i_item_id=[$0], $f1=[$1])
+  HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)])
 HiveAggregate(group=[{4}], agg#0=[sum($2)])
   HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], 
cost=[not available])
 HiveJoin(condition=[=($1, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
index bb92a1f..8674a8a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
@@ -94,24 +94,24 @@ POSTHOOK: Input: default@store_sales
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 CBO PLAN:
 HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], 
dir2=[ASC], fetch=[100])
-

[47/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join47.q.out
--
diff --git a/ql/src/test/results/clientpositive/join47.q.out 
b/ql/src/test/results/clientpositive/join47.q.out
index 2892b8b..169244e 100644
--- a/ql/src/test/results/clientpositive/join47.q.out
+++ b/ql/src/test/results/clientpositive/join47.q.out
@@ -363,24 +363,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   TableScan
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   Reduce Operator Tree:
 Join Operator
   condition map:
@@ -388,19 +388,23 @@ STAGE PLANS:
   keys:
 0 
 1 
-  outputColumnNames: _col0, _col1, _col2, _col3
-  residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) 
BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D)}
-  Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE 
Column stats: NONE
-  Limit
-Number of rows: 10
-Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+  residual filter predicates: {((_col0 = _col3) or _col2 or _col5)}
+  Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: string), _col4 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+Limit
+  Number of rows: 10
   Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -472,24 +476,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(key) (type: double)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE

[03/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
--
diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out 
b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
index 98789d7..ddb436b 100644
--- a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
+++ b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
@@ -187,25 +187,28 @@ STAGE PLANS:
 alias: test1_n5
 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column 
stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: int), col_1 (type: 
string)
-  outputColumnNames: _col0, _col1, _col2
+  expressions: key (type: int), value (type: int), col_1 (type: 
string), key BETWEEN 100 AND 102 (type: boolean)
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE 
Column stats: NONE
   Sorted Merge Bucket Map Join Operator
 condition map:
  Left Outer Join 0 to 1
 filter predicates:
-  0 {_col0 BETWEEN 100 AND 102}
+  0 {_col3}
   1 
 keys:
   0 _col1 (type: int)
   1 _col1 (type: int)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+  File Output Operator
+compressed: false
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -283,7 +286,7 @@ STAGE PLANS:
 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
 HashTable Sink Operator
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 
@@ -296,27 +299,31 @@ STAGE PLANS:
 alias: test1_n5
 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column 
stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: int), col_1 (type: 
string)
-  outputColumnNames: _col0, _col1, _col2
+  expressions: key (type: int), value (type: int), col_1 (type: 
string), key BETWEEN 100 AND 102 (type: boolean)
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE 
Column stats: NONE
   Map Join Operator
 condition map:
  Left Outer Join 0 to 1
 filter predicates:
-  0 {_col0 BETWEEN 100 AND 102}
+  0 {_col3}
   1 
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
 Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 24 Data size: 476 Basic stats: 
COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+

[42/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out 
b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 067a43c..c86450a 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -457,16 +457,16 @@ Stage-0
 SHUFFLE [RS_23]
   PartitionCols:_col0, _col1
   Group By Operator [GBY_22] (rows=1 width=20)
-
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1
-Select Operator [SEL_21] (rows=2 width=20)
-  Output:["_col1","_col4"]
-  Merge Join Operator [MERGEJOIN_57] (rows=2 width=20)
-
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual
 filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)}
+
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col5, _col1
+Select Operator [SEL_21] (rows=1 width=24)
+  Output:["_col1","_col5"]
+  Merge Join Operator [MERGEJOIN_57] (rows=1 width=24)
+
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7"],residual
 filter predicates:{((_col4 > 0) or _col2)} {((_col4 + _col7) >= 0)}
   <-Map 1 [SIMPLE_EDGE] llap
 SHUFFLE [RS_17]
   PartitionCols:_col0
-  Select Operator [SEL_2] (rows=18 width=84)
-Output:["_col0","_col1"]
+  Select Operator [SEL_2] (rows=18 width=88)
+Output:["_col0","_col1","_col2"]
 Filter Operator [FIL_36] (rows=18 width=84)
   predicate:key is not null
   TableScan [TS_0] (rows=20 width=84)
@@ -547,16 +547,16 @@ Stage-0
 SHUFFLE [RS_23]
   PartitionCols:_col0, _col1
   Group By Operator [GBY_22] (rows=1 width=20)
-
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4
-Select Operator [SEL_21] (rows=1 width=20)
-  Output:["_col1","_col4"]
-  Merge Join Operator [MERGEJOIN_57] (rows=1 width=20)
-
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual
 filter predicates:{((_col3 > 0) or _col1 is not null)} {((_col1 >= 1) or 
(_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)}
+
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7
+Select Operator [SEL_21] (rows=1 width=36)
+  Output:["_col1","_col7"]
+  Merge Join Operator [MERGEJOIN_57] (rows=1 width=36)
+
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],residual
 filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 
+ _col7) >= 0)} {((_col6 + _col9) >= 0)}
   <-Map 1 [SIMPLE_EDGE] llap
 SHUFFLE [RS_17]
   PartitionCols:_col0
-  Select Operator [SEL_2] (rows=18 width=84)
-Output:["_col0","_col1"]
+  Select Operator [SEL_2] (rows=18 width=99)
+Output:["_col0","_col1","_col2","_col3","_col4"]
 Filter Operator [FIL_36] (rows=18 width=84)
   predicate:((c_int > 0) and key is not null)
   TableScan [TS_0] (rows=20 width=84)
@@ -630,16 +630,16 @@ Stage-0
   SHUFFLE [RS_23]
 PartitionCols:_col0, _col1
 Group By Operator [GBY_22] (rows=1 width=20)
-  
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4
-  Select Operator [SEL_21] (rows=1 width=20)
-Output:["_col1","_col4"]
-Merge Join Operator [MERGEJOIN_54] (rows=1 width=20)
-  
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual
 filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)}
+  
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5
+  Select Operator [SEL_21] (rows=1

[48/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join45.q.out
--
diff --git a/ql/src/test/results/clientpositive/join45.q.out 
b/ql/src/test/results/clientpositive/join45.q.out
index 6cf6c33..7865e0e 100644
--- a/ql/src/test/results/clientpositive/join45.q.out
+++ b/ql/src/test/results/clientpositive/join45.q.out
@@ -363,24 +363,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   TableScan
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   Reduce Operator Tree:
 Join Operator
   condition map:
@@ -388,21 +388,25 @@ STAGE PLANS:
   keys:
 0 
 1 
-  outputColumnNames: _col0, _col1, _col2, _col3
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE 
Column stats: NONE
   Filter Operator
-predicate: ((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D 
AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D) (type: boolean)
-Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE 
Column stats: NONE
-Limit
-  Number of rows: 10
-  Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
+predicate: ((_col0 = _col3) or _col2 or _col5) (type: boolean)
+Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: string), _col4 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3
+  Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+  Limit
+Number of rows: 10
 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -474,24 +478,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(key)

[41/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out 
b/ql/src/test/results/clientpositive/llap/join46.q.out
index fcd0d83..ec58429 100644
--- a/ql/src/test/results/clientpositive/llap/join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/join46.q.out
@@ -187,15 +187,15 @@ STAGE PLANS:
   alias: test1_n2
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col1 (type: int)
   sort order: +
   Map-reduce partition columns: _col1 (type: int)
-  Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col2 (type: 
string)
+  Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: boolean)
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 3 
@@ -226,20 +226,24 @@ STAGE PLANS:
 condition map:
  Left Outer Join 0 to 1
 filter predicates:
-  0 {VALUE._col0 BETWEEN 100 AND 102}
+  0 {VALUE._col2}
   1 
 keys:
   0 _col1 (type: int)
   1 _col1 (type: int)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE 
Column stats: COMPLETE
-File Output Operator
-  compressed: false
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -308,13 +312,13 @@ STAGE PLANS:
   alias: test1_n2
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string)
+  Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
+

[05/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query91.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query91.q.out
index 5b4952d..98e8adf 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out
@@ -104,13 +104,13 @@ Stage-0
 SHUFFLE [RS_42]
   PartitionCols:_col0, _col1, _col2, _col3, _col4
   Group By Operator [GBY_41] (rows=1 width=585)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col12)"],keys:_col5,
 _col6, _col17, _col18, _col19
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col5,
 _col6, _col14, _col15, _col16
 Merge Join Operator [MERGEJOIN_144] (rows=10438 width=473)
-  
Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col12","_col17","_col18","_col19"]
+  
Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col11","_col14","_col15","_col16"]
 <-Map 15 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_165]
 PartitionCols:_col0
-Select Operator [SEL_164] (rows=3600 width=96)
+Select Operator [SEL_164] (rows=3600 width=4)
   Output:["_col0"]
   Filter Operator [FIL_163] (rows=3600 width=96)
 predicate:((hd_buy_potential like '0-500%') and 
hd_demo_sk is not null)
@@ -120,12 +120,12 @@ Stage-0
   SHUFFLE [RS_37]
 PartitionCols:_col2
 Merge Join Operator [MERGEJOIN_143] (rows=20876 
width=473)
-  
Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col12","_col17","_col18","_col19"]
+  
Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col11","_col14","_col15","_col16"]
 <-Reducer 12 [SIMPLE_EDGE]
   SHUFFLE [RS_35]
 PartitionCols:_col1
 Merge Join Operator [MERGEJOIN_142] (rows=657590 
width=312)
-  
Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"]
+  
Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col8"]
 <-Map 14 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_162]
 PartitionCols:_col0
@@ -152,7 +152,7 @@ Stage-0
 <-Map 13 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_159]
 PartitionCols:_col0
-Select Operator [SEL_158] (rows=50 
width=12)
+Select Operator [SEL_158] (rows=50 width=4)
   Output:["_col0"]
   Filter Operator [FIL_157] (rows=50 
width=12)
 predicate:((d_moy = 11) and (d_year = 
1999) and d_date_sk is not null)
@@ -166,7 +166,7 @@ Stage-0
 <-Map 9 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_153]
 PartitionCols:_col0
-Select Operator [SEL_152] (rows=800 
width=116)
+Select Operator [SEL_152] (rows=800 
width=4)
   Output:["_col0"]
   Filter Operator [FIL_151] (rows=800 
width=112)
 predicate:((ca_gmt_offset = -7) and 
ca_address_sk is not null)

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query92.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query92.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query92.q.out
index 50918f0..1f837dd 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query92.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query92.q.out
@@ -104,9 +104,9 @@ Stage-0
   Select Operator [SEL_34] (rows=2478 width=112)
 Output:["_col2"]
 Filter Operator [FIL_33] (rows=2478 width=112)
-  predicate:(_col2 > CAST( (1.3 * _col6) AS 
decimal(14,7)))
+  predicate:(_col2 > _col5)
   Merge Join

[35/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out 
b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
index b1eec43..93791ac 100644
--- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
@@ -667,238 +667,184 @@ POSTHOOK: query: select unionsrc.key, unionsrc.value 
FROM (select s1.key as key,
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: hdfs://### HDFS PATH ###
+100val_100
 104val_104
 105val_105
 113val_113
 118val_118
+119val_119
 12 val_12
 120val_120
+128val_128
+129val_129
 133val_133
 136val_136
+145val_145
 155val_155
 158val_158
 160val_160
 162val_162
+167val_167
 168val_168
 17 val_17
 170val_170
+174val_174
 175val_175
+178val_178
 18 val_18
 180val_180
+181val_181
 186val_186
 19 val_19
+193val_193
 197val_197
+199val_199
+20 val_20
 200val_200
+201val_201
+213val_213
+214val_214
 216val_216
 218val_218
+219val_219
+221val_221
 222val_222
+223val_223
 224val_224
+226val_226
 228val_228
+233val_233
 235val_235
 237val_237
 239val_239
+241val_241
 244val_244
 247val_247
+249val_249
 256val_256
+260val_260
+262val_262
 263val_263
 273val_273
+277val_277
 28 val_28
+281val_281
 283val_283
 286val_286
+287val_287
+288val_288
 292val_292
+298val_298
+302val_302
 306val_306
 308val_308
+310val_310
+323val_323
 327val_327
 33 val_33
+336val_336
+341val_341
+344val_344
 348val_348
+351val_351
 353val_353
 362val_362
 366val_366
+375val_375
+382val_382
+384val_384
+393val_393
+395val_395
 396val_396
 397val_397
+399val_399
 401val_401
+403val_403
+406val_406
 409val_409
 411val_411
+418val_418
 419val_419
 427val_427
 43 val_43
 432val_432
+435val_435
 436val_436
 439val_439
 443val_443
 453val_453
+455val_455
+459val_459
 460val_460
 462val_462
 47 val_47
 472val_472
-485val_485
-496val_496
-54 val_54
-64 val_64
-70 val_70
-8  val_8
-83 val_83
-84 val_84
-85 val_85
-90 val_90
-0  val_0
-103val_103
-114val_114
-125val_125
-138val_138
-146val_146
-150val_150
-152val_152
-153val_153
-156val_156
-157val_157
-165val_165
-172val_172
-177val_177
-179val_179
-187val_187
-195val_195
-196val_196
-217val_217
-242val_242
-248val_248
-252val_252
-265val_265
-27 val_27
-272val_272
-280val_280
-291val_291
-305val_305
-309val_309
-311val_311
-315val_315
-317val_317
-322val_322
-333val_333
-34 val_34
-345val_345
-35 val_35
-356val_356
-364val_364
-368val_368
-369val_369
-37 val_37
-373val_373
-377val_377
-4  val_4
-402val_402
-404val_404
-413val_413
-42 val_42
-430val_430
-431val_431
-444val_444
-449val_449
-452val_452
-454val_454
-457val_457
-463val_463
-466val_466
-470val_470
-475val_475
-481val_481
-489val_489
-491val_491
-57 val_57
-65 val_65
-66 val_66
-74 val_74
-76 val_76
-78 val_78
-9  val_9
-92 val_92
-95 val_95
-100val_100
-119val_119
-128val_128
-129val_129
-145val_145
-167val_167
-174val_174
-178val_178
-181val_181
-193val_193
-199val_199
-20 val_20
-201val_201
-213val_213
-214val_214
-219val_219
-221val_221
-223val_223
-226val_226
-233val_233
-241val_241
-249val_249
-260val_260
-262val_262
-277val_277
-281val_281
-287val_287
-288val_288
-298val_298
-302val_302
-310val_310
-323val_323
-336val_336
-341val_341
-344val_344
-351val_351
-375val_375
-382val_382
-384val_384
-393val_393
-395val_395
-399val_399
-403val_403
-406val_406
-418val_418
-435val_435
-455val_455
-459val_459
 477val_477
 478val_478
 479val_479
 482val_482
+485val_485
 493val_493
 494val_494
 495val_495
+496val_496
 497val_497
 5  val_5
+54 val_54
 58 val_58
+64 val_64
 67 val_67
+70 val_70
 77 val_77
+8  val_8
 80 val_80
+83 val_83
+84 val_84
+85 val_85
 86 val_86
+90 val_90
 97 val_97
 98 val_98
+0  val_0
 10 val_10
+103val_103
 11 val_11
 111

[15/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query17.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query17.q.out
index bb18527..642a67f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out
@@ -147,7 +147,7 @@ Stage-0
 Select Operator [SEL_47] (rows=8581091759 width=381)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
   Merge Join Operator [MERGEJOIN_213] (rows=8581091759 
width=381)
-
Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"]
+
Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"]
   <-Map 21 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_257]
   PartitionCols:_col0
@@ -161,12 +161,12 @@ Stage-0
 SHUFFLE [RS_44]
   PartitionCols:_col3
   Merge Join Operator [MERGEJOIN_212] 
(rows=8581091759 width=299)
-Conds:RS_41._col1, _col2, _col4=RS_42._col7, 
_col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"]
+Conds:RS_41._col1, _col2, _col4=RS_42._col6, 
_col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"]
   <-Reducer 11 [SIMPLE_EDGE]
 SHUFFLE [RS_42]
-  PartitionCols:_col7, _col8, _col9
+  PartitionCols:_col6, _col7, _col8
   Merge Join Operator [MERGEJOIN_211] 
(rows=1640229377 width=19)
-Conds:RS_28._col2, _col1=RS_29._col1, 
_col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"]
+Conds:RS_28._col2, _col1=RS_29._col1, 
_col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"]
   <-Reducer 10 [SIMPLE_EDGE]
 PARTITION_ONLY_SHUFFLE [RS_28]
   PartitionCols:_col2, _col1
@@ -175,7 +175,7 @@ Stage-0
   <-Map 8 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_222]
   PartitionCols:_col0
-  Select Operator [SEL_218] (rows=304 
width=94)
+  Select Operator [SEL_218] (rows=304 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_215] 
(rows=304 width=94)
   predicate:((d_quarter_name) IN 
('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
@@ -205,7 +205,7 @@ Stage-0
   <-Map 8 
[SIMPLE_EDGE] vectorized
 
PARTITION_ONLY_SHUFFLE [RS_224]
   
PartitionCols:_col0
-  Select Operator 
[SEL_219] (rows=304 width=94)
+  Select Operator 
[SEL_219] (rows=304 width=4)
 
Output:["_col0"]
 Filter 
Operator [FIL_216] (rows=304 width=94)
   
predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is 
not null)
@@ -249,7 +249,7 @@ Stage-0
 SHUFFLE [RS_41]
   PartitionCols:_col1, _col2, _col4
   Merge Join Operator [MERGEJOIN_208] 
(rows=27749405 width=294)
-
Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"]
+
Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"]
   <-Map 18 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_254]
   PartitionCols:_col0
@@ -267,7 +267,7 @@ Stage-0
   <-Map 8 [SIMPLE_EDGE] vectorized

[44/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out 
b/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out
index 87c5f48..4a77044 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out
@@ -1357,15 +1357,16 @@ STAGE PLANS:
 Tez
  A masked pattern was here 
   Edges:
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 
(SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
  A masked pattern was here 
   Vertices:
 Map 1 
 Map Operator Tree:
 TableScan
-  alias: x
+  alias: z
   Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: key (type: int)
@@ -1412,10 +1413,10 @@ STAGE PLANS:
 allNative: true
 usesVectorUDFAdaptor: false
 vectorized: true
-Map 6 
+Map 7 
 Map Operator Tree:
 TableScan
-  alias: z
+  alias: x
   Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: key (type: int)
@@ -1442,25 +1443,23 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Right Outer Join 0 to 1
- Right Outer Join 1 to 2
+ Left Outer Join 0 to 1
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-  2 _col0 (type: int)
-outputColumnNames: _col2
-Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
 Group By Operator
   aggregations: count()
-  keys: _col2 (type: int)
+  keys: _col0 (type: int)
   mode: hash
   outputColumnNames: _col0, _col1
-  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
 value expressions: _col1 (type: bigint)
 MergeJoin Vectorization:
 enabled: false
@@ -1514,6 +1513,25 @@ STAGE PLANS:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Reducer 6 
+Execution mode: llap
+Reduce Operator Tree:
+  Merge Join Operator
+condition map:
+ Left Outer Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
+MergeJoin Vectorization:
+enabled: false
+enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
 
   Stage: Stage-0
 Fetch Operator
@@ -1573,15 +1591,16 @@ STAGE PLANS:
 Tez
  A masked pattern was here 
   Edges:
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 
(SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 Reducer 4

[40/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out 
b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
index a1fe936..d9d2396 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
@@ -180,29 +180,33 @@ STAGE PLANS:
   alias: test1_n4
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
Left Outer Join 0 to 1
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 _col1 (type: int)
 1 _col1 (type: int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col6
   input vertices:
 1 Map 2
   Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-  File Output Operator
-compressed: false
+  Select Operator
+expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
 Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: llap
 LLAP IO: no inputs
 Map 2 
@@ -294,29 +298,33 @@ STAGE PLANS:
   alias: test1_n4
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
Left Outer Join 0 to 1
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 
 1 
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col6
   input vertices:
 1 Map 2
   Statistics: Num rows: 6 Data size: 1142 Basic stats: 
COMPLETE Column stats: COMPLETE
-  File Output Operator
-compressed: false
+

[20/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
index 9633df1..cbf9bca 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
@@ -75,7 +75,7 @@ HiveSortLimit(fetch=[100])
 HiveProject(ss_sold_date_sk=[$0], 
ss_customer_sk=[$3])
   HiveFilter(condition=[AND(IS NOT NULL($0), IS 
NOT NULL($3))])
 HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
-HiveProject(d_date_sk=[$0], d_date=[$2], 
d_month_seq=[$3])
+HiveProject(d_date_sk=[$0], d_date=[$2])
   HiveFilter(condition=[AND(BETWEEN(false, $3, 
1212, 1223), IS NOT NULL($0))])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
 HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], 
$f3=[$3])
@@ -90,7 +90,7 @@ HiveSortLimit(fetch=[100])
 HiveProject(cs_sold_date_sk=[$0], 
cs_bill_customer_sk=[$3])
   HiveFilter(condition=[AND(IS NOT NULL($0), IS 
NOT NULL($3))])
 HiveTableScan(table=[[default, 
catalog_sales]], table:alias=[catalog_sales])
-HiveProject(d_date_sk=[$0], d_date=[$2], 
d_month_seq=[$3])
+HiveProject(d_date_sk=[$0], d_date=[$2])
   HiveFilter(condition=[AND(BETWEEN(false, $3, 
1212, 1223), IS NOT NULL($0))])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
 HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], 
$f3=[$3])
@@ -105,7 +105,7 @@ HiveSortLimit(fetch=[100])
 HiveProject(ws_sold_date_sk=[$0], 
ws_bill_customer_sk=[$4])
   HiveFilter(condition=[AND(IS NOT NULL($0), IS 
NOT NULL($4))])
 HiveTableScan(table=[[default, web_sales]], 
table:alias=[web_sales])
-HiveProject(d_date_sk=[$0], d_date=[$2], 
d_month_seq=[$3])
+HiveProject(d_date_sk=[$0], d_date=[$2])
   HiveFilter(condition=[AND(BETWEEN(false, $3, 
1212, 1223), IS NOT NULL($0))])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
 

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
index fd3038e..51bb901 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
@@ -68,8 +68,8 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], 
d_moy=[CAST(4):INTEGER], mean=[
 HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], 
mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, 
/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), 
/(CAST($6):DOUBLE, $5)))])
   HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, 
>(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), 
/(CAST($6):DOUBLE, $5)), 1))])
 HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], 
agg#2=[count($3)], agg#3=[sum($3)])
-  HiveProject($f0=[$9], $f1=[$8], $f2=[$0], $f4=[$4], 
$f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)])
-HiveJoin(condition=[=($3, $8)], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveProject($f0=[$7], $f1=[$6], $f2=[$0], $f4=[$4], 
$f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)])
+HiveJoin(condition=[=($3, $6)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveJoin(condition=[=($2, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
 HiveProject(i_item_sk=[$0])
   HiveFilter(condition=[IS NOT NULL($0)])
@@ -78,7 +78,7 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], 
d_moy=[CAST(4):INTEGER], mean=[
   HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], 
inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3])
 HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT 
NULL($2), IS NOT NULL($0))])

[10/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query56.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query56.q.out
index 18f64cc..17458f4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out
@@ -204,9 +204,9 @@ Stage-0
 SHUFFLE [RS_71]
   PartitionCols:_col0
   Group By Operator [GBY_70] (rows=355 width=212)
-
Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+
Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1
 Merge Join Operator [MERGEJOIN_303] (rows=339151 
width=100)
-  
Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"]
+  
Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"]
 <-Reducer 2 [SIMPLE_EDGE]
   SHUFFLE [RS_66]
 PartitionCols:_col0
@@ -239,15 +239,15 @@ Stage-0
 
default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"]
 <-Reducer 23 [SIMPLE_EDGE]
   SHUFFLE [RS_67]
-PartitionCols:_col4
+PartitionCols:_col3
 Select Operator [SEL_62] (rows=1550375 
width=13)
-  Output:["_col4","_col5"]
+  Output:["_col3","_col4"]
   Merge Join Operator [MERGEJOIN_298] 
(rows=1550375 width=13)
 
Conds:RS_59._col1=RS_346._col0(Inner),Output:["_col2","_col3"]
   <-Map 28 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_346]
   PartitionCols:_col0
-  Select Operator [SEL_343] (rows=800 
width=116)
+  Select Operator [SEL_343] (rows=800 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_342] 
(rows=800 width=112)
   predicate:((ca_gmt_offset = -8) and 
ca_address_sk is not null)
@@ -261,7 +261,7 @@ Stage-0
   <-Map 20 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_330]
   PartitionCols:_col0
-  Select Operator [SEL_327] (rows=50 
width=12)
+  Select Operator [SEL_327] (rows=50 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_326] (rows=50 
width=12)
   predicate:((d_moy = 1) and 
(d_year = 2000) and d_date_sk is not null)
@@ -320,18 +320,18 @@ Stage-0
 SHUFFLE [RS_109]
   PartitionCols:_col0
   Group By Operator [GBY_108] (rows=355 width=212)
-
Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+
Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1
 Merge Join Operator [MERGEJOIN_304] (rows=172427 
width=188)
-  
Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"]
+  
Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"]
 <-Reducer 2 [SIMPLE_EDGE]
   SHUFFLE [RS_104]
 PartitionCols:_col0
  Please refer to the previous Merge Join 
Operator [MERGEJOIN_293]
 <-Reducer 26 [SIMPLE_EDGE]
   SHUFFLE [RS_105]
-PartitionCols:_col3
+PartitionCols:_col2
 Select Operator [SEL_100] (rows=788222 
width=110)
-  Output:["_col3","_col5"]
+  Output:["_col2","_col4"]
   Merge Join Operator [MERGEJOIN_301] 
(rows=788222 width=110)
 
Conds:RS_97._col2=RS_348._col0(Inner),Output:["_col1","_col3"]
   <-Map 28 [SIMPLE_EDGE] vectorized
@@ -400,18 +400,18 @@

[18/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
index fca31ef..e49b44b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
@@ -82,10 +82,10 @@ CBO PLAN:
 HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], 
dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
   HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5])
 HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], 
agg#2=[count()])
-  HiveProject($f0=[$15], $f1=[$13], $f2=[$22], $f3=[CASE(IS NULL($28), 1, 
0)], $f4=[CASE(IS NOT NULL($28), 1, 0)])
-HiveJoin(condition=[AND(=($29, $4), =($30, $6))], joinType=[left], 
algorithm=[none], cost=[not available])
-  HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], 
cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], 
cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], 
inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], 
w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], 
cd_demo_sk=[$22], cd_marital_status=[$23], hd_demo_sk=[$24], 
hd_buy_potential=[$25], d_date_sk=[$18], d_date=[$19], d_week_seq=[$20], 
d_year=[$21], d_date_sk0=[$27], d_week_seq0=[$28], d_date_sk1=[$6], 
d_date0=[$7], p_promo_sk=[$26])
-HiveJoin(condition=[AND(=($0, $27), =($20, $28))], 
joinType=[inner], algorithm=[none], cost=[not available])
+  HiveProject($f0=[$15], $f1=[$13], $f2=[$19], $f3=[CASE(IS NULL($25), 1, 
0)], $f4=[CASE(IS NOT NULL($25), 1, 0)])
+HiveJoin(condition=[AND(=($26, $4), =($27, $6))], joinType=[left], 
algorithm=[none], cost=[not available])
+  HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], 
cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], 
cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], 
inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], 
w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], 
cd_demo_sk=[$21], hd_demo_sk=[$22], d_date_sk=[$18], d_week_seq=[$19], +=[$20], 
d_date_sk0=[$24], d_week_seq0=[$25], d_date_sk1=[$6], CAST=[$7], 
p_promo_sk=[$23])
+HiveJoin(condition=[AND(=($0, $24), =($19, $25))], 
joinType=[inner], algorithm=[none], cost=[not available])
   HiveJoin(condition=[AND(=($14, $1), <($3, $17))], 
joinType=[inner], algorithm=[none], cost=[not available])
 HiveJoin(condition=[=($4, $2)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], 
inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3])
@@ -94,29 +94,29 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], 
sort3=[$2], dir0=[DESC-nulls-l
   HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2])
 HiveFilter(condition=[IS NOT NULL($0)])
   HiveTableScan(table=[[default, warehouse]], 
table:alias=[warehouse])
-HiveProject(d_date_sk=[$0], d_date=[$1], i_item_sk=[$2], 
i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], 
cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], 
cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], 
d_date0=[$13], d_week_seq=[$14], d_year=[$15], cd_demo_sk=[$16], 
cd_marital_status=[$17], hd_demo_sk=[$18], hd_buy_potential=[$19], 
p_promo_sk=[$20])
-  HiveJoin(condition=[AND(=($5, $0), >(CAST($1):DOUBLE, 
+(CAST($13):DOUBLE, 5)))], joinType=[inner], algorithm=[none], cost=[not 
available])
-HiveProject(d_date_sk=[$0], d_date=[$2])
+HiveProject(d_date_sk=[$0], CAST=[$1], i_item_sk=[$2], 
i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], 
cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], 
cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], 
d_week_seq=[$13], +=[$14], cd_demo_sk=[$15], hd_demo_sk=[$16], p_promo_sk=[$17])
+  HiveJoin(condition=[AND(=($5, $0), >($1, $14))], 
joinType=[inner], algorithm=[none], cost=[not available])
+HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE])
   HiveFilter(condition=[IS NOT NULL($0)])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[d3])
 HiveJoin(condition=[=($0, $6)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject(i_item_sk=[$0], i_item_desc=[$4])
 HiveFilter(condition=[IS NOT NULL($0)])

[13/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query27.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query27.q.out
index 59cca4f..d7fd2ed 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out
@@ -94,7 +94,7 @@ Stage-0
 Select Operator [SEL_27] (rows=1427275 width=186)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
   Merge Join Operator [MERGEJOIN_100] (rows=1427275 
width=186)
-
Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"]
+
Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"]
   <-Map 14 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_130]
   PartitionCols:_col0
@@ -108,7 +108,7 @@ Stage-0
 SHUFFLE [RS_24]
   PartitionCols:_col1
   Merge Join Operator [MERGEJOIN_99] (rows=1427275 
width=90)
-
Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col15"]
+
Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"]
   <-Map 12 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_119]
   PartitionCols:_col0
@@ -126,7 +126,7 @@ Stage-0
   <-Map 10 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_111]
   PartitionCols:_col0
-  Select Operator [SEL_110] (rows=652 
width=8)
+  Select Operator [SEL_110] (rows=652 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_109] (rows=652 
width=8)
   predicate:((d_year = 2001) and 
d_date_sk is not null)
@@ -140,7 +140,7 @@ Stage-0
   <-Map 8 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_103]
   PartitionCols:_col0
-  Select Operator [SEL_102] 
(rows=14776 width=269)
+  Select Operator [SEL_102] 
(rows=14776 width=4)
 Output:["_col0"]
 Filter Operator [FIL_101] 
(rows=14776 width=268)
   predicate:((cd_education_status 
= '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and 
cd_demo_sk is not null)

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query29.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query29.q.out
index a21c3c7..19f121e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out
@@ -144,20 +144,20 @@ Stage-0
 SHUFFLE [RS_49]
   PartitionCols:_col0, _col1, _col2, _col3
   Group By Operator [GBY_48] (rows=21091879 width=496)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7,
 _col8, _col27, _col28
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col13)","sum(_col19)","sum(_col3)"],keys:_col6,
 _col7, _col22, _col23
 Top N Key Operator [TNK_93] (rows=4156223234 width=483)
-  keys:_col7, _col8, _col27, _col28,sort order:,top 
n:100
+  keys:_col6, _col7, _col22, _col23,sort order:,top 
n:100
   Merge Join Operator [MERGEJOIN_205] (rows=4156223234 
width=483)
-Conds:RS_44._col1, _col2=RS_45._col14, 
_col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"]
+Conds:RS_44._col2, _col1=RS_45._col11, 
_col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"]
   <-Reducer 2 [SIMPLE_EDGE]
 PARTITION_ONLY_SHUFFLE [RS_44]
-  PartitionCols:_col1,

[39/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out 
b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
index ba9e81d..f84d13f 100644
--- a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
+++ b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
@@ -98,12 +98,12 @@ STAGE PLANS:
 Select Operator
   expressions: i (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 5 
@@ -118,12 +118,12 @@ STAGE PLANS:
 Select Operator
   expressions: i (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 10 Data size: 910 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 910 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Reducer 2 
@@ -152,10 +152,10 @@ STAGE PLANS:
 keys:
   0 _col9 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col11
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
 Statistics: Num rows: 100 Data size: 62700 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: _col9 (type: int), 'foo_n1' (type: string), 
_col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 
(type: double), _col8 (type: string), _col11 (type: int), 'bar' (type: string)
+  expressions: _col9 (type: int), 'foo_n1' (type: string), 
_col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 
(type: double), _col8 (type: string), _col10 (type: int), 'bar' (type: string)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
   Statistics: Num rows: 100 Data size: 80400 Basic stats: 
COMPLETE Column stats: COMPLETE
   File Output Operator
@@ -240,17 +240,17 @@ STAGE PLANS:
 Select Operator
   expressions: i (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40

[25/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query66.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query66.q.out
index e8ef1dc..80723d8 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query66.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out
@@ -624,8 +624,8 @@ STAGE PLANS:
 predicate: (ws_ship_mode_sk is not null and 
ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk 
is not null) (type: boolean)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: ws_sold_date_sk (type: int), 
ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk 
(type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), 
ws_net_paid_inc_tax (type: decimal(7,2))
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
+  expressions: ws_sold_date_sk (type: int), 
ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk 
(type: int), (ws_sales_price * CAST( ws_quantity AS decimal(10,0))) (type: 
decimal(18,2)), (ws_net_paid_inc_tax * CAST( ws_quantity AS decimal(10,0))) 
(type: decimal(18,2))
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
   Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -633,7 +633,7 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6
+outputColumnNames: _col0, _col2, _col3, _col4, _col5
 input vertices:
   1 Map 6
 Statistics: Num rows: 158402938 Data size: 21538218500 
Basic stats: COMPLETE Column stats: NONE
@@ -642,7 +642,7 @@ STAGE PLANS:
   sort order: +
   Map-reduce partition columns: _col0 (type: int)
   Statistics: Num rows: 158402938 Data size: 
21538218500 Basic stats: COMPLETE Column stats: NONE
-  value expressions: _col2 (type: int), _col3 (type: 
int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+  value expressions: _col2 (type: int), _col3 (type: 
int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2))
 Execution mode: vectorized
 Local Work:
   Map Reduce Local Work
@@ -656,8 +656,8 @@ STAGE PLANS:
 predicate: (cs_ship_mode_sk is not null and 
cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk 
is not null) (type: boolean)
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cs_sold_date_sk (type: int), 
cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk 
(type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)), 
cs_net_paid_inc_ship_tax (type: decimal(7,2))
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
+  expressions: cs_sold_date_sk (type: int), 
cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk 
(type: int), (cs_ext_sales_price * CAST( cs_quantity AS decimal(10,0))) (type: 
decimal(18,2)), (cs_net_paid_inc_ship_tax * CAST( cs_quantity AS 
decimal(10,0))) (type: decimal(18,2))
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
   Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -665,7 +665,7 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6
+outputColumnNames: _col0, _col2, _col3, _col4, _col5
 input vertices:
   1 Map 13
 Statistics: Num rows: 316788826 Data size: 42899570777 
Basic stats: COMPLETE Column stats: NONE
@@ -674,7 +674,7 @@ STAGE PLANS:
   sort order: +
   Map-reduce partition columns: _col0 (type: int)

[49/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
 
b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
index 1e195bc..b6d726e 100644
--- 
a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
+++ 
b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
@@ -542,13 +542,13 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@encrypted_table
 POSTHOOK: Input: default@src
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-OPTIMIZED SQL: SELECT *
-FROM (SELECT `key`, `value`
+OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`key` AS `key1`, 
`t2`.`value` AS `value1`
+FROM (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST`
 FROM `default`.`src`
 WHERE `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`
+INNER JOIN (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST`
 FROM `default`.`encrypted_table`
-WHERE `key` IS NOT NULL) AS `t2` ON CAST(`t0`.`key` AS DOUBLE) = 
CAST(`t2`.`key` AS DOUBLE)
+WHERE `key` IS NOT NULL) AS `t2` ON `t0`.`CAST` = `t2`.`CAST`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -567,14 +567,14 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string), value (type: string)
-outputColumnNames: _col0, _col1
+expressions: key (type: string), value (type: string), 
UDFToDouble(key) (type: double)
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col2 (type: double)
   null sort order: a
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col2 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   tag: 0
   value expressions: _col0 (type: string), _col1 (type: string)
@@ -589,14 +589,14 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: int), value (type: string)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: string), 
UDFToDouble(key) (type: double)
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col2 (type: double)
   null sort order: a
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col2 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   tag: 1
   value expressions: _col0 (type: int), _col1 (type: string)
@@ -714,32 +714,36 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 UDFToDouble(_col0) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1, _col2, _col3
+0 _col2 (type: double)
+1 _col2 (type: double)
+  outputColumnNames: _col0, _col1, _col3, _col4
   Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 0
-directory: hdfs://### HDFS PATH ###
-NumFilesPerFileSink: 1
+  Select Operator
+expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: int), _col4 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format:

[46/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join_filters_overlap.q.out
--
diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out 
b/ql/src/test/results/clientpositive/join_filters_overlap.q.out
index 7b5c9f2..847b45d 100644
--- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out
+++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out
@@ -18,15 +18,6 @@ POSTHOOK: query: explain extended select * from a_n4 left 
outer join a_n4 b on (
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@a_n4
  A masked pattern was here 
-OPTIMIZED SQL: SELECT *
-FROM (SELECT `key`, `value`
-FROM `default`.`a_n4`) AS `t`
-LEFT JOIN (SELECT `key`, CAST(50 AS INTEGER) AS `value`
-FROM `default`.`a_n4`
-WHERE `value` = 50) AS `t1` ON `t`.`key` = `t1`.`key` AND `t`.`value` = 50
-LEFT JOIN (SELECT `key`, CAST(60 AS INTEGER) AS `value`
-FROM `default`.`a_n4`
-WHERE `value` = 60) AS `t3` ON `t`.`key` = `t3`.`key` AND `t`.`value` = 60
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -40,8 +31,8 @@ STAGE PLANS:
 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column 
stats: NONE
 GatherStats: false
 Select Operator
-  expressions: key (type: int), value (type: int)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: int), value (type: int), (value = 60) 
(type: boolean), (value = 50) (type: boolean)
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
@@ -50,7 +41,7 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
 tag: 0
-value expressions: _col1 (type: int)
+value expressions: _col1 (type: int), _col2 (type: boolean), 
_col3 (type: boolean)
 auto parallelism: false
   TableScan
 alias: b
@@ -158,37 +149,41 @@ STAGE PLANS:
   filter mappings:
 0 [1, 1, 2, 1]
   filter predicates:
-0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)}
+0 {VALUE._col2} {VALUE._col1}
 1 
 2 
   keys:
 0 _col0 (type: int)
 1 _col0 (type: int)
 2 _col0 (type: int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+  outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7
   Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 0
- A masked pattern was here 
-NumFilesPerFileSink: 1
+  Select Operator
+expressions: _col0 (type: int), _col1 (type: int), _col4 (type: 
int), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
- A masked pattern was here 
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-properties:
-  columns _col0,_col1,_col2,_col3,_col4,_col5
-  columns.types int:int:int:int:int:int
-  escape.delim \
-  hive.serialization.extend.additional.nesting.levels true
-  serialization.escape.crlf true
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-TotalFiles: 1
-GatherStats: false
-MultiFileSpray: false
+File Output Operator
+  compressed: false
+  GlobalTableId: 0
+ A masked pattern was here 
+  NumFilesPerFileSink: 1
+  Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
+ A masked pattern was here 
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  properties:
+columns _col0,_col1,_col2,_col3,_col4,_col5
+columns.types int:int:int:int:int:int
+escape.delim \
+hive.serialization.extend.additional.nesting.levels

[12/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-12 Thread jcamacho

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query4.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query4.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query4.q.out
index 27ce7b5..bb0d7ba 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query4.q.out
@@ -271,367 +271,355 @@ Stage-0
 limit:100
 Stage-1
   Reducer 10 vectorized
-  File Output Operator [FS_575]
-Limit [LIM_574] (rows=100 width=85)
+  File Output Operator [FS_557]
+Limit [LIM_556] (rows=100 width=85)
   Number of rows:100
-  Select Operator [SEL_573] (rows=7323197 width=85)
+  Select Operator [SEL_555] (rows=7323197 width=85)
 Output:["_col0"]
   <-Reducer 9 [SIMPLE_EDGE]
-SHUFFLE [RS_147]
-  Select Operator [SEL_146] (rows=7323197 width=85)
+SHUFFLE [RS_141]
+  Select Operator [SEL_140] (rows=7323197 width=85)
 Output:["_col0"]
-Filter Operator [FIL_145] (rows=7323197 width=533)
-  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > 
(_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / 
_col5) > null)) ELSE (null) END) END
-  Merge Join Operator [MERGEJOIN_478] (rows=14646395 width=533)
-
Conds:RS_142._col2=RS_572._col0(Inner),Output:["_col3","_col5","_col9","_col11","_col12"]
+Filter Operator [FIL_139] (rows=7323197 width=537)
+  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col9) THEN (((_col11 / _col8) > (_col14 / _col3))) ELSE ((null > (_col14 / 
_col3))) END) ELSE (CASE WHEN (_col9) THEN (((_col11 / _col8) > null)) ELSE 
(null) END) END
+  Merge Join Operator [MERGEJOIN_472] (rows=14646395 width=537)
+
Conds:RS_136._col2=RS_554._col0(Inner),Output:["_col3","_col8","_col9","_col11","_col13","_col14"]
   <-Reducer 30 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_572]
+SHUFFLE [RS_554]
   PartitionCols:_col0
-  Select Operator [SEL_571] (rows=8000 width=297)
+  Select Operator [SEL_553] (rows=8000 width=297)
 Output:["_col0","_col1","_col2"]
-Group By Operator [GBY_570] (rows=8000 width=764)
+Group By Operator [GBY_552] (rows=8000 width=764)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6
 <-Reducer 29 [SIMPLE_EDGE]
-  SHUFFLE [RS_126]
+  SHUFFLE [RS_120]
 PartitionCols:_col0, _col1, _col2, _col3, _col4, 
_col5, _col6
-Group By Operator [GBY_125] (rows=8000 
width=764)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0,
 _col1, _col2, _col3, _col4, _col5, _col6
-  Select Operator [SEL_123] (rows=187573258 
width=1043)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-Merge Join Operator [MERGEJOIN_473] 
(rows=187573258 width=1043)
-  
Conds:RS_120._col1=RS_518._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"]
-<-Map 38 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_518]
-PartitionCols:_col0
-Select Operator [SEL_517] (rows=8000 
width=656)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-  Filter Operator [FIL_516] (rows=8000 
width=656)
-predicate:(c_customer_id is not null 
and c_customer_sk is not null)
-TableScan [TS_114] (rows=8000 
width=656)
-  
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"]
-<-Reducer 28 [SIMPLE_EDGE]
-  SHUFFLE [RS_120]
-

61 matches

Mail list logo