[2/2] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)
HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af401702 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af401702 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af401702 Branch: refs/heads/master Commit: af401702847391ab41fcf2ef8216a94a1b7bfc76 Parents: bc39c49 Author: Miklos Gergely Authored: Thu Oct 25 13:03:00 2018 -0700 Committer: Ashutosh Chauhan Committed: Mon Nov 12 15:28:18 2018 -0800 -- bin/ext/llapstatus.sh | 4 +- .../hadoop/hive/llap/cli/LlapSliderUtils.java | 55 +- .../llap/cli/LlapStatusOptionsProcessor.java| 278 --- .../hive/llap/cli/LlapStatusServiceDriver.java | 811 --- .../hadoop/hive/llap/cli/status/AmInfo.java | 93 +++ .../hive/llap/cli/status/AppStatusBuilder.java | 231 ++ .../hadoop/hive/llap/cli/status/ExitCode.java | 44 + .../hive/llap/cli/status/LlapInstance.java | 134 +++ .../llap/cli/status/LlapStatusCliException.java | 40 + .../hive/llap/cli/status/LlapStatusHelpers.java | 449 -- .../status/LlapStatusServiceCommandLine.java| 302 +++ .../cli/status/LlapStatusServiceDriver.java | 775 ++ .../hadoop/hive/llap/cli/status/State.java | 31 + .../hive/llap/cli/status/package-info.java | 24 + .../llap/cli/TestLlapStatusServiceDriver.java | 98 --- .../TestLlapStatusServiceCommandLine.java | 91 +++ .../hive/llap/cli/status/package-info.java | 23 + .../java/org/apache/hive/http/LlapServlet.java | 9 +- 18 files changed, 1799 insertions(+), 1693 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/bin/ext/llapstatus.sh -- diff --git a/bin/ext/llapstatus.sh b/bin/ext/llapstatus.sh index 2d2c8f4..23e6be6 100644 --- a/bin/ext/llapstatus.sh +++ b/bin/ext/llapstatus.sh @@ -17,7 +17,7 @@ THISSERVICE=llapstatus export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} " llapstatus () { - CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; + CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver; if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then echo "Missing Hive CLI Jar" exit 3; @@ -36,7 +36,7 @@ llapstatus () { } llapstatus_help () { - CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; + CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver; execHiveCmd $CLASS "--help" } http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java index af47b26..5ec9e1d 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java @@ -24,69 +24,24 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.client.ServiceClient; import org.apache.hadoop.yarn.service.utils.CoreFileSystem; -import org.apache.hadoop.yarn.util.Clock; -import org.apache.hadoop.yarn.util.SystemClock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class LlapSliderUtils { - private static final Logger LOG = LoggerFactory - .getLogger(LlapSliderUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(LlapSliderUtils.class); private static final String LLAP_PACKAGE_DIR = ".yarn/package/LLAP/"; - public static ServiceClient createServiceClient( - Configuration conf) throws Exception { + public static ServiceClient createServiceClient(Configuration conf) throws Exception { ServiceClient serviceClient = new ServiceClient(); serviceClient.init(conf); serviceClient.start(); return serviceClient; } - public static ApplicationReport getAppReport(String appName, ServiceClient serviceClient, - long timeoutMs) throws - LlapStatusServiceDriver.LlapStatusCliException { -Clock clock = SystemClock.getInstance(); -long startTime = clock.getTime(); -long timeoutTime =
[1/2] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master bc39c4998 -> af4017028 http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java new file mode 100644 index 000..bee5079 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +import java.util.Arrays; +import java.util.Properties; + +import jline.TerminalFactory; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Parses, verifies, prints and provides the command line arguments of the Llap Status program. + */ +public class LlapStatusServiceCommandLine { + private static final Logger LOGGER = LoggerFactory.getLogger("LlapStatusServiceDriverConsole"); + + @VisibleForTesting + static final long DEFAULT_FIND_YARN_APP_TIMEOUT_MS = 20 * 1000L; + @VisibleForTesting + static final long DEFAULT_STATUS_REFRESH_INTERVAL_MS = 1 * 1000L; + @VisibleForTesting + static final long DEFAULT_WATCH_MODE_TIMEOUT_MS = 5 * 60 * 1000L; + @VisibleForTesting + static final float DEFAULT_RUNNING_NODES_THRESHOLD = 1.0f; + + @SuppressWarnings("static-access") + private static final Option NAME = OptionBuilder + .withLongOpt("name") + .withDescription("LLAP cluster name") + .withArgName("name") + .hasArg() + .create('n'); + + @SuppressWarnings("static-access") + private static final Option FIND_APP_TIMEOUT = OptionBuilder + .withLongOpt("findAppTimeout") + .withDescription("Amount of time(s) that the tool will sleep to wait for the YARN application to start." + + "negative values=wait forever, 0=Do not wait. default=" + (DEFAULT_FIND_YARN_APP_TIMEOUT_MS / 1000) + "s") + .withArgName("findAppTimeout") + .hasArg() + .create('f'); + + @SuppressWarnings("static-access") + private static final Option OUTPUT_FILE = OptionBuilder + .withLongOpt("outputFile") + .withDescription("File to which output should be written (Default stdout)") + .withArgName("outputFile") + .hasArg() + .create('o'); + + @SuppressWarnings("static-access") + private static final Option WATCH_MODE = OptionBuilder + .withLongOpt("watch") + .withDescription("Watch mode waits until all LLAP daemons are running or subset of the nodes are running " + + "(threshold can be specified via -r option) (Default wait until all nodes are running)") + .withArgName("watch") + .create('w'); + + @SuppressWarnings("static-access") + private static final Option NOT_LAUNCHED = OptionBuilder + .withLongOpt("notLaunched") + .withDescription("In watch mode, do not assume that the application was already launched if there's doubt " + + "(e.g. if the last application instance has failed).") + .withArgName("notLaunched") + .create('l'); + + @SuppressWarnings("static-access") + private static final Option RUNNING_NODES_THRESHOLD = OptionBuilder + .withLongOpt("runningNodesThreshold") + .withDescription("When watch mode is enabled (-w), wait until the specified threshold of nodes are running " + + "(Default 1.0 which means 100% nodes are running)") + .withArgName("runningNodesThreshold") + .hasArg() + .create('r'); + + @SuppressWarnings("static-access") + private static final Option REFRESH_INTERVAL = OptionBuilder + .withLongOpt("refreshInterval") +
[4/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java index 2783016..89d74e3 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java @@ -36,17 +36,16 @@ public class HiveDruidSplit extends FileSplit implements org.apache.hadoop.mapre // required for deserialization public HiveDruidSplit() { -super((Path) null, 0, 0, (String[]) null); +super(null, 0, 0, (String[]) null); } - public HiveDruidSplit(String druidQuery, Path dummyPath, String hosts[]) { + public HiveDruidSplit(String druidQuery, Path dummyPath, String[] hosts) { super(dummyPath, 0, 0, hosts); this.druidQuery = druidQuery; this.hosts = hosts; } - @Override - public void write(DataOutput out) throws IOException { + @Override public void write(DataOutput out) throws IOException { super.write(out); out.writeUTF(druidQuery); out.writeInt(hosts.length); @@ -55,8 +54,7 @@ public class HiveDruidSplit extends FileSplit implements org.apache.hadoop.mapre } } - @Override - public void readFields(DataInput in) throws IOException { + @Override public void readFields(DataInput in) throws IOException { super.readFields(in); druidQuery = in.readUTF(); int length = in.readInt(); @@ -71,15 +69,12 @@ public class HiveDruidSplit extends FileSplit implements org.apache.hadoop.mapre return druidQuery; } - @Override - public String[] getLocations() throws IOException { + @Override public String[] getLocations() throws IOException { return hosts; } - @Override - public String toString() { -return "HiveDruidSplit{" + druidQuery + ", " -+ (hosts == null ? "empty hosts" : Arrays.toString(hosts)) + "}"; + @Override public String toString() { +return "HiveDruidSplit{" + druidQuery + ", " + (hosts == null ? "empty hosts" : Arrays.toString(hosts)) + "}"; } } http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java index 425a5bb..c1b3bf8 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/json/KafkaSupervisorIOConfig.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,13 +17,11 @@ */ package org.apache.hadoop.hive.druid.json; -import io.druid.java.util.common.StringUtils; - import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Optional; import com.google.common.base.Preconditions; - +import io.druid.java.util.common.StringUtils; import org.joda.time.Duration; import org.joda.time.Period; @@ -33,8 +31,7 @@ import java.util.Map; * This class is copied from druid source code * in order to avoid adding additional dependencies on druid-indexing-service. */ -public class KafkaSupervisorIOConfig -{ +public class KafkaSupervisorIOConfig { public static final String BOOTSTRAP_SERVERS_KEY = "bootstrap.servers"; private final String topic; @@ -46,13 +43,11 @@ public class KafkaSupervisorIOConfig private final Duration period; private final boolean useEarliestOffset; private final Duration completionTimeout; - private final Optional lateMessageRejectionPeriod; - private final Optional earlyMessageRejectionPeriod; + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private final Optional lateMessageRejectionPeriod; + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private final Optional earlyMessageRejectionPeriod; private final boolean skipOffsetGaps; - @JsonCreator - public KafkaSupervisorIOConfig( - @JsonProperty("topic") String topic, + @JsonCreator public KafkaSupervisorIOConfig(@JsonProperty("topic") String topic,
[2/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java -- diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java index acde239..e27f8cf 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/serde/TestDruidSerDe.java @@ -33,11 +33,11 @@ import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.Properties; +import java.util.stream.Collectors; import io.druid.java.util.http.client.HttpClient; import io.druid.java.util.http.client.response.HttpResponseHandler; import io.druid.query.scan.ScanResultValue; -import io.druid.query.select.EventHolder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveChar; @@ -77,12 +77,8 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.google.common.base.Function; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; import com.google.common.util.concurrent.SettableFuture; import io.druid.data.input.Row; @@ -97,52 +93,54 @@ import org.junit.rules.ExpectedException; * Basic tests for Druid SerDe. The examples are taken from Druid 0.9.1.1 * documentation. */ -public class TestDruidSerDe { +@SuppressWarnings({ "SameParameterValue", "SpellCheckingInspection" }) public class TestDruidSerDe { // Timeseries query - private static final String TIMESERIES_QUERY = - "{ \"queryType\": \"timeseries\", " - + " \"dataSource\": \"sample_datasource\", " - + " \"granularity\": \"day\", " - + " \"descending\": \"true\", " - + " \"filter\": { " - + " \"type\": \"and\", " - + " \"fields\": [ " - + " { \"type\": \"selector\", \"dimension\": \"sample_dimension1\", \"value\": \"sample_value1\" }, " - + " { \"type\": \"or\"," - + "\"fields\": [ " - + " { \"type\": \"selector\", \"dimension\": \"sample_dimension2\", \"value\": \"sample_value2\" }, " - + " { \"type\": \"selector\", \"dimension\": \"sample_dimension3\", \"value\": \"sample_value3\" }" - + "] " - + " } " - + " ] " - + " }, " - + " \"aggregations\": [ " - + " { \"type\": \"longSum\", \"name\": \"sample_name1\", \"fieldName\": \"sample_fieldName1\" }, " - + " { \"type\": \"doubleSum\", \"name\": \"sample_name2\", \"fieldName\": \"sample_fieldName2\" } " - + " ], " - + " \"postAggregations\": [ " - + " { \"type\": \"arithmetic\", " - + "\"name\": \"sample_divide\", " - + "\"fn\": \"/\", " - + "\"fields\": [ " - + " { \"type\": \"fieldAccess\", \"name\": \"postAgg__sample_name1\", \"fieldName\": \"sample_name1\" }, " - + " { \"type\": \"fieldAccess\", \"name\": \"postAgg__sample_name2\", \"fieldName\": \"sample_name2\" } " - + "] " - + " } " - + " ], " - + " \"intervals\": [ \"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ]}"; + private static final String + TIMESERIES_QUERY = + "{ \"queryType\": \"timeseries\", " + + " \"dataSource\": \"sample_datasource\", " + + " \"granularity\": \"day\", " + + " \"descending\": \"true\", " + + " \"filter\": { " + + " \"type\": \"and\", " + + " \"fields\": [ " + + " { \"type\": \"selector\", \"dimension\": \"sample_dimension1\", \"value\": \"sample_value1\" }, " + + " { \"type\": \"or\"," + + "\"fields\": [ " + + " { \"type\": \"selector\", \"dimension\": \"sample_dimension2\", \"value\": \"sample_value2\" }, " + + " { \"type\": \"selector\", \"dimension\": \"sample_dimension3\", \"value\": \"sample_value3\" }" + + "] " + + " } " + + " ] " + + " }, " + + " \"aggregations\": [ " + + " { \"type\": \"longSum\", \"name\": \"sample_name1\", \"fieldName\": \"sample_fieldName1\" }, " + + " { \"type\": \"doubleSum\", \"name\": \"sample_name2\",
[5/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java index c3e7e5d..8fcadea 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,7 +30,6 @@ import com.google.common.collect.Interners; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Ordering; -import com.google.common.io.CharStreams; import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.StringDimensionSchema; import io.druid.jackson.DefaultObjectMapper; @@ -69,6 +68,7 @@ import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.segment.IndexIO; import io.druid.segment.IndexMergerV9; import io.druid.segment.IndexSpec; +import io.druid.segment.data.BitmapSerdeFactory; import io.druid.segment.data.ConciseBitmapSerdeFactory; import io.druid.segment.data.RoaringBitmapSerdeFactory; import io.druid.segment.indexing.granularity.GranularitySpec; @@ -92,6 +92,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; @@ -105,6 +106,7 @@ import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponseStatus; import org.joda.time.DateTime; import org.joda.time.Interval; +import org.joda.time.Period; import org.joda.time.chrono.ISOChronology; import org.skife.jdbi.v2.Folder3; import org.skife.jdbi.v2.Handle; @@ -117,37 +119,52 @@ import org.skife.jdbi.v2.util.ByteArrayMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.OutputStream; -import java.io.Reader; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URL; import java.net.UnknownHostException; import java.sql.SQLException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Properties; import java.util.Set; import java.util.TimeZone; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; - +import java.util.stream.Collectors; /** * Utils class for Druid storage handler. */ public final class DruidStorageHandlerUtils { + private DruidStorageHandlerUtils () { + + } private static final Logger LOG = LoggerFactory.getLogger(DruidStorageHandlerUtils.class); + private static final String DRUID_ROLLUP = "druid.rollup"; + private static final String DRUID_QUERY_GRANULARITY = "druid.query.granularity"; + public static final String DRUID_QUERY_FETCH = "druid.query.fetch"; + static final String DRUID_SEGMENT_DIRECTORY = "druid.storage.storageDirectory"; + public static final String DRUID_SEGMENT_INTERMEDIATE_DIRECTORY = "druid.storage.storageDirectory.intermediate"; + public static final String DRUID_SEGMENT_VERSION = "druid.segment.version"; + public static final String DRUID_JOB_WORKING_DIRECTORY = "druid.job.workingDirectory"; + static final String KAFKA_TOPIC = "kafka.topic"; + static final String KAFKA_BOOTSTRAP_SERVERS = "kafka.bootstrap.servers"; + static final String DRUID_KAFKA_INGESTION_PROPERTY_PREFIX = "druid.kafka.ingestion."; + static final String DRUID_KAFKA_CONSUMER_PROPERTY_PREFIX = DRUID_KAFKA_INGESTION_PROPERTY_PREFIX + "consumer."; + /* Kafka Ingestion state - valid values - START/STOP/RESET */ + static final String DRUID_KAFKA_INGESTION = "druid.kafka.ingestion"; private static final int NUM_RETRIES = 8;
[6/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)
HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dca389b0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dca389b0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dca389b0 Branch: refs/heads/master Commit: dca389b067a36f0a3bf28743fceacdc144c34bcb Parents: d22fc5b Author: Slim Bouguerra Authored: Mon Nov 12 07:54:06 2018 -0800 Committer: Slim Bouguerra Committed: Mon Nov 12 07:54:06 2018 -0800 -- .../org/apache/hadoop/hive/conf/Constants.java | 17 - .../hadoop/hive/druid/DruidKafkaUtils.java | 167 +++ .../hadoop/hive/druid/DruidStorageHandler.java | 879 + .../hive/druid/DruidStorageHandlerInfo.java | 53 +- .../hive/druid/DruidStorageHandlerUtils.java| 883 ++--- .../hadoop/hive/druid/io/DruidOutputFormat.java | 32 +- .../druid/io/DruidQueryBasedInputFormat.java| 63 +- .../hadoop/hive/druid/io/DruidRecordWriter.java | 212 ++- .../hadoop/hive/druid/io/HiveDruidSplit.java| 19 +- .../druid/json/KafkaSupervisorIOConfig.java | 199 ++- .../hive/druid/json/KafkaSupervisorReport.java | 157 +-- .../hive/druid/json/KafkaSupervisorSpec.java| 119 +- .../druid/json/KafkaSupervisorTuningConfig.java | 152 +-- .../hive/druid/json/KafkaTuningConfig.java | 175 +-- .../hadoop/hive/druid/json/TaskReportData.java | 68 +- .../hive/druid/security/DruidKerberosUtil.java | 58 +- .../hive/druid/security/KerberosHttpClient.java | 86 +- .../druid/security/ResponseCookieHandler.java | 44 +- .../RetryIfUnauthorizedResponseHandler.java | 62 +- .../druid/security/RetryResponseHolder.java | 23 +- .../serde/DruidGroupByQueryRecordReader.java| 19 +- .../druid/serde/DruidQueryRecordReader.java | 171 +-- .../druid/serde/DruidScanQueryRecordReader.java | 35 +- .../serde/DruidSelectQueryRecordReader.java | 34 +- .../hadoop/hive/druid/serde/DruidSerDe.java | 185 +-- .../hive/druid/serde/DruidSerDeUtils.java | 48 +- .../serde/DruidTimeseriesQueryRecordReader.java |3 +- .../druid/serde/DruidTopNQueryRecordReader.java | 46 +- .../hadoop/hive/druid/serde/DruidWritable.java | 60 +- .../hive/druid/DerbyConnectorTestUtility.java | 13 +- .../hadoop/hive/druid/QTestDruidSerDe.java | 73 +- .../hive/druid/TestDruidStorageHandler.java | 107 +- .../TestHiveDruidQueryBasedInputFormat.java |4 +- .../hadoop/hive/druid/serde/TestDruidSerDe.java | 1234 +- .../hive/ql/io/TestDruidRecordWriter.java | 238 ++-- .../clientpositive/kafka_storage_handler.q |4 +- .../druid/kafka_storage_handler.q.out |8 +- 37 files changed, 2669 insertions(+), 3081 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/common/src/java/org/apache/hadoop/hive/conf/Constants.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java b/common/src/java/org/apache/hadoop/hive/conf/Constants.java index 61bc9df..44d0717 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -32,8 +32,6 @@ public class Constants { "org.apache.hadoop.hive.druid.io.DruidOutputFormat"; public static final String DRUID_DATA_SOURCE = "druid.datasource"; public static final String DRUID_SEGMENT_GRANULARITY = "druid.segment.granularity"; - public static final String DRUID_ROLLUP = "druid.rollup"; - public static final String DRUID_QUERY_GRANULARITY = "druid.query.granularity"; public static final String DRUID_TARGET_SHARDS_PER_GRANULARITY = "druid.segment.targetShardsPerGranularity"; public static final String DRUID_TIMESTAMP_GRANULARITY_COL_NAME = "__time_granularity"; @@ -42,21 +40,6 @@ public class Constants { public static final String DRUID_QUERY_FIELD_NAMES = "druid.fieldNames"; public static final String DRUID_QUERY_FIELD_TYPES = "druid.fieldTypes"; public static final String DRUID_QUERY_TYPE = "druid.query.type"; - public static final String DRUID_QUERY_FETCH = "druid.query.fetch"; - public static final String DRUID_SEGMENT_DIRECTORY = "druid.storage.storageDirectory"; - public static final String DRUID_SEGMENT_INTERMEDIATE_DIRECTORY = "druid.storage.storageDirectory.intermediate"; - - public static final String DRUID_SEGMENT_VERSION = "druid.segment.version"; - public static final String DRUID_JOB_WORKING_DIRECTORY = "druid.job.workingDirectory"; - - - public static final String KAFKA_TOPIC = "kafka.topic"; - public static final String KAFKA_BOOTSTRAP_SERVERS =
[1/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/master d22fc5b24 -> dca389b06 http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java -- diff --git a/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java b/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java index cb8fa39..111f047 100644 --- a/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java +++ b/druid-handler/src/test/org/apache/hadoop/hive/ql/io/TestDruidRecordWriter.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -18,14 +18,13 @@ package org.apache.hadoop.hive.ql.io; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import io.druid.data.input.Firehose; import io.druid.data.input.InputRow; -import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionsSpec; import io.druid.data.input.impl.InputRowParser; import io.druid.data.input.impl.MapInputRowParser; @@ -69,141 +68,144 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import javax.annotation.Nullable; import java.io.File; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; -public class TestDruidRecordWriter { - private ObjectMapper objectMapper = DruidStorageHandlerUtils.JSON_MAPPER; +/** + * Test Class for Druid Record Writer. + */ +@SuppressWarnings("ConstantConditions") public class TestDruidRecordWriter { + private final ObjectMapper objectMapper = DruidStorageHandlerUtils.JSON_MAPPER; private static final Interval INTERVAL_FULL = new Interval("2014-10-22T00:00:00Z/P1D"); - @Rule - public TemporaryFolder temporaryFolder = new TemporaryFolder(); - - private DruidRecordWriter druidRecordWriter; - - final List> expectedRows = ImmutableList.of( - ImmutableMap.of( - DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, - DateTime.parse("2014-10-22T00:00:00.000Z").getMillis(), - "host", ImmutableList.of("a.example.com"), - "visited_sum", 190L, - "unique_hosts", 1.0d - ), - ImmutableMap.of( - DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, - DateTime.parse("2014-10-22T01:00:00.000Z").getMillis(), - "host", ImmutableList.of("b.example.com"), - "visited_sum", 175L, - "unique_hosts", 1.0d - ), - ImmutableMap.of( - DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, - DateTime.parse("2014-10-22T02:00:00.000Z").getMillis(), - "host", ImmutableList.of("c.example.com"), - "visited_sum", 270L, - "unique_hosts", 1.0d - ) - ); - - - @Test - public void testTimeStampColumnName() { + @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + final List> + expectedRows = + ImmutableList.of(ImmutableMap.of(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, + DateTime.parse("2014-10-22T00:00:00.000Z").getMillis(), + "host", + ImmutableList.of("a.example.com"), + "visited_sum", + 190L, + "unique_hosts", + 1.0d), + ImmutableMap.of(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, + DateTime.parse("2014-10-22T01:00:00.000Z").getMillis(), + "host", + ImmutableList.of("b.example.com"), + "visited_sum", + 175L, + "unique_hosts", + 1.0d), + ImmutableMap.of(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, + DateTime.parse("2014-10-22T02:00:00.000Z").getMillis(), + "host", + ImmutableList.of("c.example.com"), + "visited_sum", + 270L, + "unique_hosts", + 1.0d)); + + @Test public void testTimeStampColumnName() { Assert.assertEquals("Time column name need to match to ensure serdeser compatibility", -DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, DruidTable.DEFAULT_TIMESTAMP_COLUMN -); +
[3/6] hive git commit: HIVE-20903: Some minor refactor to the Druid Storage Handler without any change in logic (Slim B reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/dca389b0/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java index 8c10261..53d7441 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java @@ -30,7 +30,6 @@ import io.druid.java.util.common.guava.CloseQuietly; import io.druid.java.util.http.client.HttpClient; import io.druid.java.util.http.client.Request; import io.druid.java.util.http.client.response.InputStreamResponseHandler; -import io.druid.query.BaseQuery; import io.druid.query.Query; import io.druid.query.QueryInterruptedException; import org.apache.hadoop.conf.Configuration; @@ -49,6 +48,7 @@ import java.io.Closeable; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; +import java.util.Objects; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; @@ -61,17 +61,11 @@ import java.util.concurrent.Future; * DruidWritable containing the timestamp as well as all values resulting from * the query. */ -public abstract class DruidQueryRecordReader, R extends Comparable> -extends RecordReader -implements org.apache.hadoop.mapred.RecordReader { +public abstract class DruidQueryRecordReader> extends RecordReader +implements org.apache.hadoop.mapred.RecordReader { private static final Logger LOG = LoggerFactory.getLogger(DruidQueryRecordReader.class); - private HttpClient httpClient; - private ObjectMapper mapper; - // Smile mapper is used to read query results that are serialized as binary instead of json - private ObjectMapper smileMapper; - /** * Query that Druid executes. */ @@ -80,62 +74,60 @@ public abstract class DruidQueryRecordReader, R extends C /** * Query results as a streaming iterator. */ - protected JsonParserIterator queryResultsIterator = null; - - /** - * Result type definition used to read the rows, this is query dependent. - */ - protected JavaType resultsType = null; + JsonParserIterator queryResultsIterator = null; - @Override - public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { + @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { initialize(split, context.getConfiguration()); } - public void initialize(InputSplit split, Configuration conf, ObjectMapper mapper, - ObjectMapper smileMapper, HttpClient httpClient - ) throws IOException { + public void initialize(InputSplit split, + Configuration conf, + ObjectMapper mapper, + ObjectMapper smileMapper, + HttpClient httpClient) throws IOException { HiveDruidSplit hiveDruidSplit = (HiveDruidSplit) split; Preconditions.checkNotNull(hiveDruidSplit, "input split is null ???"); -this.mapper = Preconditions.checkNotNull(mapper, "object Mapper can not be null"); -// Smile mapper is used to read query results that are serilized as binary instead of json -this.smileMapper = Preconditions.checkNotNull(smileMapper, "Smile Mapper can not be null"); +ObjectMapper mapper1 = Preconditions.checkNotNull(mapper, "object Mapper can not be null"); +// Smile mapper is used to read query results that are serialized as binary instead of json +// Smile mapper is used to read query results that are serialized as binary instead of json +ObjectMapper smileMapper1 = Preconditions.checkNotNull(smileMapper, "Smile Mapper can not be null"); // Create query -this.query = this.mapper.readValue(Preconditions.checkNotNull(hiveDruidSplit.getDruidQuery()), Query.class); +this.query = mapper1.readValue(Preconditions.checkNotNull(hiveDruidSplit.getDruidQuery()), Query.class); Preconditions.checkNotNull(query); -this.resultsType = getResultTypeDef(); -this.httpClient = Preconditions.checkNotNull(httpClient, "need Http Client"); +/* + Result type definition used to read the rows, this is query dependent. + */ +JavaType resultsType = getResultTypeDef(); +HttpClient httpClient1 = Preconditions.checkNotNull(httpClient, "need Http Client"); final String[] locations = hiveDruidSplit.getLocations(); -boolean initlialized = false; +boolean initialized = false; int currentLocationIndex = 0; Exception ex = null; -while (!initlialized && currentLocationIndex < locations.length) { +while (!initialized && currentLocationIndex < locations.length) { String address = locations[currentLocationIndex++]; - if(Strings.isNullOrEmpty(address)) { + if
hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master 558876462 -> bc39c4998 HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc39c499 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc39c499 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc39c499 Branch: refs/heads/master Commit: bc39c49988c8a5d881a23ed7dd5d4adba0509ee9 Parents: 5588764 Author: Gour Saha Authored: Mon Nov 12 13:04:21 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Nov 12 13:04:24 2018 -0800 -- llap-server/src/main/resources/package.py | 2 -- 1 file changed, 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/bc39c499/llap-server/src/main/resources/package.py -- diff --git a/llap-server/src/main/resources/package.py b/llap-server/src/main/resources/package.py index 9eb3fd7..c48ff79 100644 --- a/llap-server/src/main/resources/package.py +++ b/llap-server/src/main/resources/package.py @@ -130,8 +130,6 @@ def main(args): service_keytab_path += "/" + service_keytab else: service_keytab_path = service_keytab - if service_keytab_path: - service_keytab_path = "hdfs:///user/hive/" + service_keytab_path if not input: print "Cannot find input files"
hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/branch-3 cd4491900 -> 4663e50e7 HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4663e50e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4663e50e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4663e50e Branch: refs/heads/branch-3 Commit: 4663e50e709c1f836acb34841a72f1dfc9f31da9 Parents: cd44919 Author: Gour Saha Authored: Mon Nov 12 13:04:21 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Nov 12 13:04:45 2018 -0800 -- llap-server/src/main/resources/package.py | 2 -- 1 file changed, 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4663e50e/llap-server/src/main/resources/package.py -- diff --git a/llap-server/src/main/resources/package.py b/llap-server/src/main/resources/package.py index 9eb3fd7..c48ff79 100644 --- a/llap-server/src/main/resources/package.py +++ b/llap-server/src/main/resources/package.py @@ -130,8 +130,6 @@ def main(args): service_keytab_path += "/" + service_keytab else: service_keytab_path = service_keytab - if service_keytab_path: - service_keytab_path = "hdfs:///user/hive/" + service_keytab_path if not input: print "Cannot find input files"
[32/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/mapjoin47.q.out -- diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out b/ql/src/test/results/clientpositive/mapjoin47.q.out index d3e61f8..dadac0d 100644 --- a/ql/src/test/results/clientpositive/mapjoin47.q.out +++ b/ql/src/test/results/clientpositive/mapjoin47.q.out @@ -385,8 +385,8 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -400,8 +400,8 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -409,19 +409,23 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3 -residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D)} -Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE -Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +residual filter predicates: {((_col0 = _col3) or _col2 or _col5)} +Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE + Limit +Number of rows: 10 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -501,8 +505,8 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -516,8 +520,8 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string),
[17/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out index 50474bc..f5a71b4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out @@ -184,36 +184,36 @@ CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], _o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 20)=[substr($0, 1, 20)]) - HiveAggregate(group=[{7}], agg#0=[sum($26)], agg#1=[count($26)], agg#2=[sum($21)], agg#3=[count($21)], agg#4=[sum($20)], agg#5=[count($20)]) -HiveJoin(condition=[AND(AND(=($0, $17), =($4, $1)), =($5, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveAggregate(group=[{14}], agg#0=[sum($32)], agg#1=[count($32)], agg#2=[sum($27)], agg#3=[count($27)], agg#4=[sum($26)], agg#5=[count($26)]) +HiveJoin(condition=[AND(AND(AND(=($1, $18), =($2, $19)), =($0, $21)), OR(AND($3, $4, $36), AND($5, $6, $37), AND($7, $8, $38)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $24, 100, 150)), AND(=($1, _UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $24, 50, 100)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $24, 150, 200], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) -HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) -HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[AND(=($0, $13), OR(AND($1, $24), AND($2, $25), AND($3, $26)))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) +HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) +HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, reason]], table:alias=[reason]) - HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) -HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wp_web_page_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) -
[28/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query26.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out b/ql/src/test/results/clientpositive/perf/spark/query26.q.out index b6ee41e..48c0e11 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out @@ -221,11 +221,11 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) -outputColumnNames: _col4, _col5, _col6, _col7, _col18 +outputColumnNames: _col4, _col5, _col6, _col7, _col12 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4), count(_col4), sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col18 (type: string) + keys: _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query27.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out index 4063c4f..6c64664 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out @@ -207,7 +207,7 @@ STAGE PLANS: keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col15 + outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col11 input vertices: 1 Map 9 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE @@ -216,7 +216,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE -value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string) +value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -225,10 +225,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col4, _col5, _col6, _col7, _col15, _col17 +outputColumnNames: _col4, _col5, _col6, _col7, _col11, _col13 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: string), _col15 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + expressions: _col13 (type: string), _col11 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query29.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index 2e5c0f3..f4a4524 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -295,7 +295,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col3, _col5, _col10, _col11, _col13, _col18, _col19 +outputColumnNames: _col3, _col5, _col8, _col9, _col11, _col14, _col15 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
[37/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_select.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 6870ad1..0435530 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -32,14 +32,15 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: p_size (type: int) -outputColumnNames: _col0 -Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE +expressions: p_size (type: int), p_size is null (type: boolean) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Select Operator expressions: p_size (type: int) outputColumnNames: p_size @@ -77,12 +78,12 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col2 -Statistics: Num rows: 27 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE +outputColumnNames: _col0, _col1, _col3 +Statistics: Num rows: 27 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 27 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: boolean) + Statistics: Num rows: 27 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -92,10 +93,10 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col2, _col3, _col4 -Statistics: Num rows: 27 Data size: 548 Basic stats: COMPLETE Column stats: COMPLETE +outputColumnNames: _col0, _col1, _col3, _col4, _col5 +Statistics: Num rows: 27 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), CASE WHEN ((_col3 = 0L)) THEN (false) WHEN (_col2 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (null) ELSE (false) END (type: boolean) + expressions: _col0 (type: int), CASE WHEN (_col4) THEN (false) WHEN (_col3 is not null) THEN (true) WHEN (_col1) THEN (null) WHEN (_col5) THEN (null) ELSE (false) END (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -136,10 +137,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator -sort order: -Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE -value expressions: _col0 (type: bigint), _col1 (type: bigint) + Select Operator +expressions: (_col0 = 0L) (type: boolean), (_col1 < _col0) (type: boolean) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: boolean), _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -219,15 +224,15 @@ STAGE
[16/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query11.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/query11.q.out index 2f453f3..da1c349 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query11.q.out @@ -189,249 +189,241 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_358] -Limit [LIM_357] (rows=100 width=85) + File Output Operator [FS_354] +Limit [LIM_353] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_356] (rows=12248093 width=85) + Select Operator [SEL_352] (rows=12248093 width=85) Output:["_col0"] <-Reducer 7 [SIMPLE_EDGE] -SHUFFLE [RS_97] - Select Operator [SEL_96] (rows=12248093 width=85) +SHUFFLE [RS_93] + Select Operator [SEL_92] (rows=12248093 width=85) Output:["_col0"] -Filter Operator [FIL_95] (rows=12248093 width=533) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col8 / _col3))) ELSE ((null > (_col8 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END - Merge Join Operator [MERGEJOIN_291] (rows=24496186 width=533) - Conds:RS_92._col2=RS_355._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8"] +Filter Operator [FIL_91] (rows=12248093 width=537) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / _col3))) END) ELSE (CASE WHEN (_col6) THEN (((_col1 / _col5) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_287] (rows=24496186 width=537) + Conds:RS_88._col2=RS_351._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] <-Reducer 20 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_355] +SHUFFLE [RS_351] PartitionCols:_col0 - Select Operator [SEL_354] (rows=8000 width=297) + Select Operator [SEL_350] (rows=8000 width=297) Output:["_col0","_col1","_col2"] -Group By Operator [GBY_353] (rows=8000 width=764) +Group By Operator [GBY_349] (rows=8000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_83] + SHUFFLE [RS_79] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 -Group By Operator [GBY_82] (rows=8000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_80] (rows=187573258 width=847) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] -Merge Join Operator [MERGEJOIN_288] (rows=187573258 width=847) - Conds:RS_77._col1=RS_321._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] -<-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] -PartitionCols:_col0 -Select Operator [SEL_320] (rows=8000 width=656) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_319] (rows=8000 width=656) -predicate:(c_customer_id is not null and c_customer_sk is not null) -TableScan [TS_71] (rows=8000 width=656) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] -<-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_77] -PartitionCols:_col1 -
[11/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query48.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out index 76b4ce1..1f63e95 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -143,15 +143,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 7 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -165,103 +165,103 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["sum(_col5)"] - Select Operator [SEL_28] (rows=25203 width=86) + Select Operator [SEL_28] (rows=20247 width=24) Output:["_col5"] -Filter Operator [FIL_27] (rows=25203 width=86) - predicate:(((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) - Merge Join Operator [MERGEJOIN_96] (rows=75613 width=86) - Conds:RS_24._col3=RS_118._col0(Inner),Output:["_col5","_col7","_col14"] +Filter Operator [FIL_27] (rows=20247 width=24) + predicate:((_col12 and _col6) or (_col13 and _col7) or (_col14 and _col8)) + Merge Join Operator [MERGEJOIN_96] (rows=26999 width=24) + Conds:RS_24._col3=RS_115._col0(Inner),Output:["_col5","_col6","_col7","_col8","_col12","_col13","_col14"] <-Map 12 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_118] +SHUFFLE [RS_115] PartitionCols:_col0 - Select Operator [SEL_117] (rows=3529412 width=187) -Output:["_col0","_col1"] -Filter Operator [FIL_116] (rows=3529412 width=187) + Select Operator [SEL_114] (rows=3529412 width=16) +Output:["_col0","_col1","_col2","_col3"] +Filter Operator [FIL_113] (rows=3529412 width=187) predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) TableScan [TS_12] (rows=4000 width=187) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_95] (rows=856941 width=0) - Conds:RS_21._col2=RS_110._col0(Inner),Output:["_col3","_col5","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_110] + Merge Join Operator [MERGEJOIN_95] (rows=305980 width=12) + Conds:RS_21._col4=RS_126._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col8"] + <-Map 11 [SIMPLE_EDGE] vectorized +SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_109] (rows=29552 width=184) + Select Operator [SEL_125] (rows=1704 width=4) Output:["_col0"] -Filter Operator [FIL_108] (rows=29552 width=183) - predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_9] (rows=1861800 width=183) -
[34/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index a8f06eb..def06a5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -122,18 +122,19 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: int), ((key > 40) and (value > 50) and (key = value)) (type: boolean) +outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true +selectExpressions: VectorUDFAdaptor(((key > 40) and (value > 50) and (key = value)))(children: LongColGreaterLongScalar(col 0:int, val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean) -> 6:boolean Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 -1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} +1 {_col2} keys: 0 1 @@ -368,18 +369,19 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true +selectExpressions: LongColGreaterLongScalar(col 0:int, val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 -1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} +1 {_col2} {_col3} {_col4} keys: 0 _col0 (type: int) 1 _col1 (type: int) @@ -397,7 +399,7 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true -selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int +selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 8:int Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) @@ -541,18 +543,19 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true +selectExpressions: LongColGreaterLongScalar(col 0:int, val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map:
[26/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query49.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out b/ql/src/test/results/clientpositive/perf/spark/query49.q.out index 07d14b5..354c178 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out @@ -304,7 +304,7 @@ STAGE PLANS: predicate: ((ws_net_paid > 0) and (ws_net_profit > 1) and (ws_quantity > 0) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_paid (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), CASE WHEN (ws_quantity is not null) THEN (ws_quantity) ELSE (0) END (type: int), CASE WHEN (ws_net_paid is not null) THEN (ws_net_paid) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -312,7 +312,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) +value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)) Execution mode: vectorized Map 10 Map Operator Tree: @@ -343,7 +343,7 @@ STAGE PLANS: predicate: ((wr_return_amt > 1) and wr_item_sk is not null and wr_order_number is not null) (type: boolean) Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) + expressions: wr_item_sk (type: int), wr_order_number (type: int), CASE WHEN (wr_return_quantity is not null) THEN (wr_return_quantity) ELSE (0) END (type: int), CASE WHEN (wr_return_amt is not null) THEN (wr_return_amt) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -351,7 +351,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE -value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) +value expressions: _col2 (type: int), _col3 (type: decimal(12,2)) Execution mode: vectorized Map 12 Map Operator Tree: @@ -363,7 +363,7 @@ STAGE PLANS: predicate: ((cs_net_paid > 0) and (cs_net_profit > 1) and (cs_quantity > 0) and cs_item_sk is not null and cs_order_number is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 10666290 Data size: 129931 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_net_paid (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), CASE WHEN (cs_quantity is not null) THEN (cs_quantity) ELSE (0) END (type: int), CASE WHEN (cs_net_paid is not null) THEN (cs_net_paid) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10666290 Data size: 129931 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -371,7 +371,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int)
[45/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 68fc903..4ed9b60 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -135,10 +135,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +INNER JOIN (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n13` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = `t2`.`key` STAGE DEPENDENCIES: @@ -326,11 +326,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col1, _col4 +outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col4 (type: string) + expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -584,10 +584,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +INNER JOIN (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n13` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = `t2`.`key` STAGE DEPENDENCIES: @@ -775,11 +775,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col1, _col4 +outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col4 (type: string) + expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index acb1e87..8039d0f 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -125,7 +125,7 @@ STAGE PLANS: Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a @@ -137,17 +137,17 @@ STAGE PLANS: Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1780
[33/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index bd40d8f..1b4d343 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -234,17 +234,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +selectExpressions: LongColumnBetween(col 0:int, left 100, right 102) -> 4:boolean +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 _col1 (type: int) @@ -253,20 +254,27 @@ STAGE PLANS: className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator -compressed: false -File Sink Vectorization: -className: VectorFileSinkOperator -native: false + Select Operator +expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +Select Vectorization: +className: VectorSelectOperator +native: true Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -396,17 +404,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN
[50/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/auto_join_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out index 43a248b..42e165d 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -63,8 +63,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -82,8 +82,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -91,7 +91,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -119,13 +119,13 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: -0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) -1 UDFToDouble(_col0) (type: double) +0 (_col1 + _col3) (type: double) +1 _col1 (type: double) Stage: Stage-5 Map Reduce @@ -135,17 +135,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 +0 (_col1 + _col3) (type: double) +1 _col1 (type: double) + outputColumnNames: _col0, _col2, _col4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false + Select Operator +expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -166,8 +170,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -
[29/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query18.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out index e77a918..1d73576 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out @@ -106,15 +106,15 @@ STAGE PLANS: predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( c_birth_year AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int) +value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(12,2)) Execution mode: vectorized Map 13 Map Operator Tree: @@ -145,15 +145,15 @@ STAGE PLANS: predicate: ((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) (type: boolean) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cd_demo_sk (type: int), cd_dep_count (type: int) - outputColumnNames: _col0, _col3 + expressions: cd_demo_sk (type: int), CAST( cd_dep_count AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1 Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE -value expressions: _col3 (type: int) +value expressions: _col1 (type: decimal(12,2)) Execution mode: vectorized Map 15 Map Operator Tree: @@ -224,7 +224,7 @@ STAGE PLANS: predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -232,7 +232,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int)
[02/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 3165970..2fc9a3d 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -2662,7 +2662,7 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2703,22 +2703,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Select Operator - expressions: _col1 (type: int), substr(_col2, 5) (type: string) +Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: sum(_col1) -keys: _col0 (type: int) -mode: hash -outputColumnNames: _col0, _col1 + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) +value expressions: _col1 (type: double) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -2827,7 +2823,7 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2868,22 +2864,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Select Operator - expressions: _col1 (type: int), substr(_col2, 5) (type: string) +Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: sum(_col1) -keys: _col0 (type: int) -mode: hash -outputColumnNames: _col0, _col1 + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type:
[08/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query66.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/query66.q.out index 225b62f..767d47b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -511,10 +511,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_62] (rows=5559759 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_60] (rows=5559759 width=680) + Select Operator [SEL_60] (rows=5559759 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] -Merge Join Operator [MERGEJOIN_204] (rows=5559759 width=680) - Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] +Merge Join Operator [MERGEJOIN_204] (rows=5559759 width=750) + Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] <-Map 24 [SIMPLE_EDGE] vectorized SHUFFLE [RS_259] PartitionCols:_col0 @@ -527,12 +527,12 @@ Stage-0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col3 -Merge Join Operator [MERGEJOIN_203] (rows=5559759 width=205) - Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11"] +Merge Join Operator [MERGEJOIN_203] (rows=5559759 width=274) + Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_245] PartitionCols:_col0 -Select Operator [SEL_242] (rows=1 width=88) +Select Operator [SEL_242] (rows=1 width=4) Output:["_col0"] Filter Operator [FIL_241] (rows=1 width=88) predicate:((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) @@ -541,13 +541,13 @@ Stage-0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col2 -Merge Join Operator [MERGEJOIN_202] (rows=9518 width=224) - Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col11"] +Merge Join Operator [MERGEJOIN_202] (rows=9518 width=278) + Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 18
[21/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index ace7cf5..b55f2c1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product -Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product +Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product +Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -128,16 +128,15 @@ HiveSortLimit(fetch=[100]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1]) -HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject($f0=[$3], $f1=[*(CAST($1):DECIMAL(10, 0), $2)]) -HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) -HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(c_customer_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(c_customer_sk=[$0], $f1=[$1]) +HiveAggregate(group=[{2}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) +HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) +HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) +HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cnt=[$0]) HiveFilter(condition=[<=(sq_count_check($0), 1)]) @@ -146,75 +145,71 @@ HiveSortLimit(fetch=[100]) HiveProject HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject($f0=[$0], $f1=[$1]) -HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject($f0=[$0], $f1=[*(CAST($3):DECIMAL(10, 0), $4)]) -HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) -HiveTableScan(table=[[default,
[27/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query4.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out b/ql/src/test/results/clientpositive/perf/spark/query4.q.out index 67e8b4f..93e83ef 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out @@ -237,13 +237,13 @@ STAGE PLANS: Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) Reducer 12 <- Reducer 11 (GROUP, 481) -Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 306), Map 19 (PARTITION-LEVEL SORT, 306) -Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 873), Reducer 16 (PARTITION-LEVEL SORT, 873) -Reducer 18 <- Reducer 17 (GROUP, 369) +Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) +Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) +Reducer 18 <- Reducer 17 (GROUP, 186) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) -Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 (PARTITION-LEVEL SORT, 154) -Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 (PARTITION-LEVEL SORT, 706) -Reducer 24 <- Reducer 23 (GROUP, 186) +Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306) +Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 873), Reducer 22 (PARTITION-LEVEL SORT, 873) +Reducer 24 <- Reducer 23 (GROUP, 369) Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 306), Map 31 (PARTITION-LEVEL SORT, 306) Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 873), Reducer 28 (PARTITION-LEVEL SORT, 873) Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) @@ -266,16 +266,15 @@ STAGE PLANS: predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2) (type: decimal(14,6)) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) -Execution mode: vectorized +value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Map 13 Map Operator Tree: TableScan @@ -318,23 +317,22 @@ STAGE PLANS: Map 15 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) -Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE +predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) +Statistics: Num rows: 144002668 Data size: 19580198212 Basic
[01/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
Repository: hive Updated Branches: refs/heads/master dca389b06 -> 558876462 http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out index 44f9b5d..4190a21 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -396,14 +396,14 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator -key expressions: _col1 (type: string) +key expressions: _col0 (type: string) sort order: + -Map-reduce partition columns: _col1 (type: string) +Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE -value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) +value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: @@ -426,13 +426,13 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col0 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -441,10 +441,10 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), 1 (type: int), _col10 (type: string),
[24/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query75.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out b/ql/src/test/results/clientpositive/perf/spark/query75.q.out index 85e6dca..553d11a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out @@ -219,14 +219,14 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) +value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized Map 11 Map Operator Tree: @@ -397,14 +397,14 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) +value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized Map 34 Map Operator Tree: @@ -530,23 +530,23 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12 +outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) Reducer 15 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 -outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 +outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) + expressions: _col7 (type: int), _col8
[07/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query75.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/query75.q.out index 9968ade..f4bd046 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query75.q.out @@ -244,7 +244,7 @@ Stage-0 Select Operator [SEL_539] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_538] (rows=170474971 width=234) - Conds:RS_103._col1, _col2=RS_625._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] + Conds:RS_103._col1, _col2=RS_625._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] <-Map 44 [SIMPLE_EDGE] vectorized SHUFFLE [RS_625] PartitionCols:_col0, _col1 @@ -258,12 +258,12 @@ Stage-0 SHUFFLE [RS_103] PartitionCols:_col1, _col2 Merge Join Operator [MERGEJOIN_510] (rows=96821196 width=138) - Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] + Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_599] PartitionCols:_col0 -Select Operator [SEL_592] (rows=45745 width=109) - Output:["_col0","_col1","_col2","_col3","_col5"] +Select Operator [SEL_592] (rows=45745 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_591] (rows=45745 width=109) predicate:((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null and i_manufact_id is not null) TableScan [TS_6] (rows=462000 width=109) @@ -276,7 +276,7 @@ Stage-0 <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_571] PartitionCols:_col0 -Select Operator [SEL_562] (rows=652 width=8) +Select Operator [SEL_562] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_558] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) @@ -321,7 +321,7 @@ Stage-0 Select Operator [SEL_548] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_547] (rows=450703984 width=204) - Conds:RS_125._col1, _col2=RS_649._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] + Conds:RS_125._col1, _col2=RS_649._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] <-Map 46 [SIMPLE_EDGE] vectorized SHUFFLE [RS_649] PartitionCols:_col0, _col1 @@ -335,7 +335,7 @@ Stage-0 SHUFFLE [RS_125] PartitionCols:_col1, _col2
[31/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_3.q.out -- diff --git a/ql/src/test/results/clientpositive/masking_3.q.out b/ql/src/test/results/clientpositive/masking_3.q.out index 15a8963..725d905 100644 --- a/ql/src/test/results/clientpositive/masking_3.q.out +++ b/ql/src/test/results/clientpositive/masking_3.q.out @@ -54,12 +54,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator +expressions: _col0 (type: int), UDFToDouble(_col0) (type: double) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -72,20 +76,20 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: @@ -93,21 +97,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 UDFToDouble(_col0) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1 +0 _col1 (type: double) +1 _col1 (type: double) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -keys: _col0 (type: string), _col1 (type: int) -mode: hash + Select Operator +expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1
[04/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/pointlookup3.q.out -- diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index a5fa5e8..a3056a5 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -391,15 +391,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 A masked pattern was here -OPTIMIZED SQL: SELECT `t4`.`key`, `t4`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t4`.`ds2`, `t4`.`key1`, `t4`.`value1`, `t4`.`ds11`, CAST('2001-04-08' AS STRING) AS `ds21` -FROM (SELECT `t0`.`key`, `t0`.`value`, `t0`.`ds2`, `t2`.`key` AS `key1`, `t2`.`value` AS `value1`, `t2`.`ds1` AS `ds11` -FROM (SELECT `key`, `value`, CAST('2000-04-08' AS STRING) AS `ds1`, `ds2` +OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, `t3`.`ds1` AS `ds11`, CAST('2001-04-08' AS STRING) AS `ds21` +FROM (SELECT * +FROM (SELECT `key`, `value`, `ds2` FROM `default`.`pcr_t1_n1` WHERE `ds1` = '2000-04-08' AND `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value`, `ds1`, CAST('2001-04-08' AS STRING) AS `ds2` +INNER JOIN (SELECT `key`, `value`, `ds1` FROM `default`.`pcr_t1_n1` WHERE `ds2` = '2001-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = `t2`.`key` -ORDER BY `t2`.`key`, `t2`.`value`) AS `t4` +ORDER BY `t2`.`key`, `t2`.`value`) AS `t3` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -420,7 +420,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds2 (type: string) -outputColumnNames: _col0, _col1, _col3 +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -429,7 +429,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: false TableScan alias: t2 @@ -515,30 +515,26 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 -Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator +compressed: false +GlobalTableId: 0 A masked pattern was here - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: -column.name.delimiter , -columns _col0,_col1,_col2,_col3,_col4,_col5 -columns.types int,string,string,int,string,string -escape.delim \ -serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +NumFilesPerFileSink: 1 +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,int,string,string + escape.delim \ +
[30/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_4.q.out -- diff --git a/ql/src/test/results/clientpositive/masking_4.q.out b/ql/src/test/results/clientpositive/masking_4.q.out index 60cbd0f..54861b0 100644 --- a/ql/src/test/results/clientpositive/masking_4.q.out +++ b/ql/src/test/results/clientpositive/masking_4.q.out @@ -210,12 +210,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator +expressions: _col0 (type: int), UDFToDouble(_col0) (type: double) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -228,20 +232,20 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: @@ -249,21 +253,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 UDFToDouble(_col0) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1 +0 _col1 (type: double) +1 _col1 (type: double) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -keys: _col0 (type: string), _col1 (type: int) -mode: hash + Select Operator +expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage:
[43/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index accb3a7..6954647 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -535,14 +535,14 @@ STAGE PLANS: filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: ds (type: string) +expressions: day(CAST( ds AS DATE)) (type: int) outputColumnNames: _col0 -Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: day(CAST( _col0 AS DATE)) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -555,16 +555,16 @@ STAGE PLANS: predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: day(CAST( ds AS DATE)) (type: int) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator -key expressions: day(CAST( _col0 AS DATE)) (type: int) +key expressions: _col0 (type: int) sort order: + -Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) +Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: day(CAST( _col0 AS DATE)) (type: int) +expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -587,9 +587,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 day(CAST( _col0 AS DATE)) (type: int) - 1 day(CAST( _col0 AS DATE)) (type: int) -Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) +Statistics: Num rows: 2200 Data size: 8800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -678,14 +678,14 @@ STAGE PLANS: filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: ds (type: string) +expressions: day(CAST( ds AS DATE)) (type: int) outputColumnNames: _col0 -Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: day(CAST( _col0 AS DATE)) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE +
[51/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55887646 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55887646 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55887646 Branch: refs/heads/master Commit: 558876462d2589423d7131b51c24dbf61b8a22b9 Parents: dca389b Author: Zoltan Haindrich Authored: Mon Nov 12 10:03:28 2018 -0800 Committer: Jesus Camacho Rodriguez Committed: Mon Nov 12 10:03:57 2018 -0800 -- .../results/positive/accumulo_queries.q.out | 34 +- .../test/results/positive/hbase_queries.q.out | 34 +- .../hive/jdbc/AbstractJdbcTriggersTest.java |5 +- .../jdbc/TestTriggersMoveWorkloadManager.java |9 +- .../jdbc/TestTriggersTezSessionPoolManager.java | 21 +- .../hadoop/hive/ql/parse/CalcitePlanner.java|2 +- .../bucket_mapjoin_mismatch1.q.out |4 +- .../clientpositive/allcolref_in_udf.q.out | 28 +- .../annotate_stats_join_pkfk.q.out | 40 +- .../results/clientpositive/auto_join13.q.out|6 +- .../results/clientpositive/auto_join19.q.out|4 +- .../clientpositive/auto_join19_inclause.q.out |4 +- .../results/clientpositive/auto_join2.q.out | 24 +- .../results/clientpositive/auto_join32.q.out|6 +- .../results/clientpositive/auto_join9.q.out |4 +- .../clientpositive/auto_join_stats.q.out| 122 +- .../clientpositive/auto_join_stats2.q.out | 90 +- .../materialized_view_create_rewrite.q.out |8 +- .../clientpositive/bucket_map_join_spark1.q.out |4 +- .../clientpositive/bucket_map_join_spark2.q.out |4 +- .../clientpositive/bucket_map_join_spark3.q.out |4 +- .../clientpositive/bucket_map_join_spark4.q.out |4 +- .../bucketsortoptimize_insert_4.q.out |4 +- .../bucketsortoptimize_insert_5.q.out |4 +- .../bucketsortoptimize_insert_8.q.out |8 +- .../test/results/clientpositive/cbo_const.q.out | 82 +- .../results/clientpositive/cbo_rp_join1.q.out | 80 +- .../clientpositive/cbo_rp_outer_join_ppr.q.out |8 +- .../constantPropagateForSubQuery.q.out | 10 +- .../results/clientpositive/constprog2.q.out | 16 +- .../clientpositive/constprog_partitioner.q.out |8 +- .../clientpositive/correlationoptimizer8.q.out | 58 +- .../test/results/clientpositive/cte_mat_5.q.out | 18 +- .../results/clientpositive/deleteAnalyze.q.out | 18 +- .../clientpositive/druid/druidmini_mv.q.out | 16 +- .../encryption_join_unencrypted_tbl.q.out | 76 +- .../clientpositive/filter_cond_pushdown.q.out | 90 +- .../clientpositive/filter_join_breaktask.q.out | 24 +- .../infer_bucket_sort_map_operators.q.out | 24 +- .../clientpositive/infer_join_preds.q.out | 22 +- .../results/clientpositive/innerjoin1.q.out | 76 +- .../test/results/clientpositive/input23.q.out |8 +- .../results/clientpositive/interval_3.q.out | 16 +- ql/src/test/results/clientpositive/join13.q.out |8 +- ql/src/test/results/clientpositive/join2.q.out | 32 +- ql/src/test/results/clientpositive/join26.q.out |2 +- ql/src/test/results/clientpositive/join32.q.out |2 +- ql/src/test/results/clientpositive/join33.q.out |2 +- ql/src/test/results/clientpositive/join42.q.out | 28 +- ql/src/test/results/clientpositive/join45.q.out | 392 +- ql/src/test/results/clientpositive/join46.q.out | 290 +- ql/src/test/results/clientpositive/join47.q.out | 392 +- ql/src/test/results/clientpositive/join9.q.out |6 +- .../clientpositive/join_cond_pushdown_1.q.out | 18 +- .../clientpositive/join_cond_pushdown_3.q.out | 18 +- .../join_cond_pushdown_unqual1.q.out| 18 +- .../join_cond_pushdown_unqual3.q.out| 18 +- .../clientpositive/join_emit_interval.q.out | 52 +- .../clientpositive/join_filters_overlap.q.out | 338 +- .../results/clientpositive/join_merging.q.out | 152 +- .../test/results/clientpositive/join_view.q.out |4 +- .../clientpositive/llap/auto_join_filters.q.out | 60 +- .../llap/auto_sortmerge_join_6.q.out| 174 +- .../llap/bucket_map_join_tez2.q.out | 116 +- .../clientpositive/llap/bucketmapjoin1.q.out| 20 +- .../clientpositive/llap/bucketmapjoin2.q.out|4 +- .../clientpositive/llap/bucketmapjoin3.q.out| 16 +- .../llap/bucketsortoptimize_insert_2.q.out | 24 +- .../llap/bucketsortoptimize_insert_6.q.out | 301 +- .../llap/bucketsortoptimize_insert_7.q.out |8 +- .../clientpositive/llap/check_constraint.q.out | 43 +- .../llap/constprog_semijoin.q.out | 54 +- .../llap/constraints_optimization.q.out |
[14/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query23.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out index 7784792..059195a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product -Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product +Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product +Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -166,399 +166,391 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_699] -Limit [LIM_698] (rows=1 width=112) + File Output Operator [FS_689] +Limit [LIM_688] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_697] (rows=1 width=112) + Group By Operator [GBY_687] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_608] -Group By Operator [GBY_607] (rows=1 width=112) + Reduce Output Operator [RS_598] +Group By Operator [GBY_597] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_605] (rows=1 width=112) + Select Operator [SEL_595] (rows=1 width=112) Output:["_col0"] -Merge Join Operator [MERGEJOIN_604] (rows=1 width=116) - Conds:RS_248._col2=RS_249._col0(Inner),Output:["_col3","_col4"] +Merge Join Operator [MERGEJOIN_594] (rows=1 width=116) + Conds:RS_240._col2=RS_241._col0(Inner),Output:["_col3","_col4"] <-Reducer 11 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_248] + PARTITION_ONLY_SHUFFLE [RS_240] PartitionCols:_col2 -Merge Join Operator [MERGEJOIN_592] (rows=155 width=0) - Conds:RS_245._col1=RS_642._col0(Inner),Output:["_col2","_col3","_col4"] +Merge Join Operator [MERGEJOIN_582] (rows=155 width=0) + Conds:RS_237._col1=RS_632._col0(Inner),Output:["_col2","_col3","_col4"] <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_642] + SHUFFLE [RS_632] PartitionCols:_col0 -Group By Operator [GBY_639] (rows=2235 width=4) +Group By Operator [GBY_629] (rows=2235 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_638] (rows=6548799 width=12) + Select Operator [SEL_628] (rows=6548799 width=290) Output:["_col1"] -Filter Operator [FIL_637] (rows=6548799 width=12) +Filter Operator [FIL_627] (rows=6548799 width=290) predicate:(_col3 > 4L) - Select Operator [SEL_636] (rows=19646398 width=12) -Output:["_col0","_col3"] -Group By Operator [GBY_635] (rows=19646398 width=290) + Select Operator [SEL_626] (rows=19646398 width=290) +Output:["_col1","_col3"] +Group By Operator [GBY_625] (rows=19646398 width=290) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 17 [SIMPLE_EDGE] -
[22/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out index ccad088..19f3039 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out @@ -114,28 +114,28 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], $f3=[CAST($4):DECIMAL(17, 2)]) - HiveAggregate(group=[{}], agg#0=[sum($16)], agg#1=[count($16)], agg#2=[sum($18)], agg#3=[count($18)], agg#4=[sum($19)], agg#5=[count($19)]) -HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $17, 100, 150), =($7, 3)), AND(=($1, _UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $17, 50, 100), =($7, 1)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $17, 150, 200), =($7, 1], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)], agg#2=[sum($22)], agg#3=[count($22)], agg#4=[sum($23)], agg#5=[count($23)]) +HiveJoin(condition=[AND(=($0, $17), OR(AND($1, $2, $27, $12), AND($3, $4, $28, $13), AND($5, $6, $29, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveJoin(condition=[AND(=($11, $0), OR(AND(IN($1, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM'), BETWEEN(false, $17, 100, 200)), AND(IN($1, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN'), BETWEEN(false, $17, 150, 300)), AND(IN($1, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), BETWEEN(false, $17, 50, 250], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_country=[CAST(_UTF-16LE'United States'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveJoin(condition=[AND(=($12, $0), OR(AND($1, $17), AND($2, $18), AND($3, $19)))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3]) + HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) -HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_sales_price=[$13], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], ss_net_profit=[$22]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) +HiveTableScan(table=[[default, store]], table:alias=[store]) +
[06/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index f5800b9..1ada394 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -183,15 +183,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 15 (BROADCAST_EDGE) +Map 11 <- Reducer 13 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 17 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -200,134 +200,138 @@ Stage-0 limit:-1 Stage-1 Reducer 10 vectorized - File Output Operator [FS_239] -Limit [LIM_238] (rows=72 width=832) + File Output Operator [FS_240] +Limit [LIM_239] (rows=7 width=832) Number of rows:100 - Select Operator [SEL_237] (rows=72 width=832) + Select Operator [SEL_238] (rows=7 width=832) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_236] - Select Operator [SEL_235] (rows=72 width=832) +SHUFFLE [RS_237] + Select Operator [SEL_236] (rows=7 width=832) Output:["_col4","_col5","_col6","_col7"] -Group By Operator [GBY_234] (rows=72 width=353) +Group By Operator [GBY_235] (rows=7 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 -Group By Operator [GBY_48] (rows=72 width=353) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22 - Merge Join Operator [MERGEJOIN_206] (rows=8055 width=100) -Conds:RS_44._col3, _col24, _col25=RS_232._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_232] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_231] (rows=265971 width=183) -Output:["_col0","_col1","_col2"] -Filter Operator [FIL_230] (rows=265971 width=183) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_21] (rows=1861800 width=183) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 7 [SIMPLE_EDGE] -SHUFFLE [RS_44] - PartitionCols:_col3, _col24, _col25 - Filter Operator [FIL_43] (rows=8055 width=390) -predicate:(((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) -Merge Join Operator [MERGEJOIN_205] (rows=24166 width=390) - Conds:RS_40._col1=RS_233._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"] -<-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] -PartitionCols:_col0 - Please refer to the previous Select Operator
[23/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index 6bdbf7e..a7bf288 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -182,8 +182,7 @@ POSTHOOK: Input: default@web_sales A masked pattern was here STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -191,47 +190,42 @@ STAGE PLANS: Spark A masked pattern was here Vertices: -Map 13 +Map 12 Map Operator Tree: TableScan - alias: reason - filterExpr: r_reason_sk is not null (type: boolean) - Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + alias: web_page + filterExpr: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: r_reason_sk is not null (type: boolean) -Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE +predicate: wp_web_page_sk is not null (type: boolean) +Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: r_reason_sk (type: int), r_reason_desc (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 -Spark - A masked pattern was here - Vertices: -Map 11 +Map 13 Map Operator Tree: TableScan - alias: web_page - filterExpr: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + alias: reason + filterExpr: r_reason_sk is not null (type: boolean) + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: wp_web_page_sk is not null (type: boolean) -Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE +predicate: r_reason_sk is not null (type: boolean) +Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + expressions: r_reason_sk (type: int), r_reason_desc (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col10 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -241,11 +235,11 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL SORT, 28) -Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 178), Reducer 2 (PARTITION-LEVEL SORT, 178) -Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 65), Reducer 3 (PARTITION-LEVEL SORT, 65) -Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 83), Reducer 4 (PARTITION-LEVEL SORT, 83) -Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 13), Reducer 5 (PARTITION-LEVEL SORT, 13) -Reducer 7
[36/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out index 37970ab..7e09d5e 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out @@ -51,14 +51,14 @@ STAGE PLANS: predicate: (csmallint < 100S) (type: boolean) Statistics: Num rows: 4096 Data size: 1031250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4096 Data size: 1031250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 1043486 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator -key expressions: UDFToInteger(_col1) (type: int) +key expressions: _col12 (type: int) sort order: + -Map-reduce partition columns: UDFToInteger(_col1) (type: int) -Statistics: Num rows: 4096 Data size: 1031250 Basic stats: COMPLETE Column stats: COMPLETE +Map-reduce partition columns: _col12 (type: int) +Statistics: Num rows: 4096 Data size: 1043486 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -72,14 +72,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: UDFToInteger(key) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator -key expressions: UDFToInteger(_col0) (type: int) +key expressions: _col0 (type: int) sort order: + -Map-reduce partition columns: UDFToInteger(_col0) (type: int) -Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -92,14 +92,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats:
[38/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index c72e4b2..c43ad91 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -109,13 +109,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 -Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE +expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 +Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -147,16 +147,16 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 -residual filter predicates: {(UDFToDouble(_col5) > _col9)} -Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 +residual filter predicates: {(_col9 > _col10)} +Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false -Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -397,12 +397,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4,
[09/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query64.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out index f670c4f..7c77e9f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -265,9 +265,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE) -Map 44 <- Reducer 40 (BROADCAST_EDGE) -Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE) +Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 46 (BROADCAST_EDGE) +Map 43 <- Reducer 40 (BROADCAST_EDGE) +Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 50 (BROADCAST_EDGE) Map 56 <- Reducer 42 (BROADCAST_EDGE) Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) @@ -276,18 +276,18 @@ Reducer 13 <- Reducer 31 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 14 <- Map 54 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) -Reducer 18 <- Map 43 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 45 (ONE_TO_ONE_EDGE) +Reducer 19 <- Map 51 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 46 (ONE_TO_ONE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) Reducer 21 <- Map 52 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Map 36 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Map 53 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) Reducer 24 <- Map 16 (CUSTOM_SIMPLE_EDGE) Reducer 25 <- Map 16 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 26 <- Map 43 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 50 (ONE_TO_ONE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 49 (ONE_TO_ONE_EDGE) +Reducer 27 <- Map 51 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) Reducer 29 <- Map 52 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 30 <- Map 36 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) @@ -299,13 +299,13 @@ Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE) Reducer 41 <- Map 39 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) Reducer 42 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE) -Reducer 46 <- Reducer 45 (SIMPLE_EDGE) -Reducer 47 <- Reducer 46 (CUSTOM_SIMPLE_EDGE) -Reducer 49 <- Map 48 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE) +Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE) +Reducer 45 <- Reducer 44 (SIMPLE_EDGE) +Reducer 46 <- Reducer 45 (CUSTOM_SIMPLE_EDGE) +Reducer 48 <- Map 47 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE) +Reducer 49 <- Reducer 48 (SIMPLE_EDGE) Reducer 5 <- Map 36 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Reducer 49 (SIMPLE_EDGE) -Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE) +Reducer 50 <- Reducer 49 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 54 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 23 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 54 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) @@ -320,10 +320,10 @@ Stage-0 Select Operator [SEL_1200] (rows=2169965329 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_259] -Select Operator [SEL_258] (rows=2169965329 width=1694) + SHUFFLE [RS_257] +Select Operator [SEL_256] (rows=2169965329 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_257] (rows=2169965329 width=1694) + Filter Operator [FIL_255] (rows=2169965329 width=1694) predicate:(_col19 <= _col12) Merge Join Operator [MERGEJOIN_1087] (rows=6509895988 width=1694) Conds:RS_1171._col2, _col1, _col3=RS_1199._col1, _col0,
[19/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out index 2504d78..b4410ff 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out @@ -141,10 +141,10 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) - HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($3, $1), $5), CAST(3):DECIMAL(10, 0))]) -HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, *(0.9, $5), *(1.1, $5))), BETWEEN(false, $1, *(0.9, $5), *(1.1, $5))), BETWEEN(false, $5, *(0.9, $3), *(1.1, $3))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $1)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(AND(=($2, $0), BETWEEN(false, $3, *(0.9, $1), *(1.1, $1))), BETWEEN(false, $1, *(0.9, $3), *(1.1, $3)))], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($5, $1), $9), CAST(3):DECIMAL(10, 0))]) +HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, $11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $7)), BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $2, $3)), BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -175,7 +175,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -HiveProject(i_item_id=[$0], $f1=[$1]) +HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -206,7 +206,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out index bb92a1f..8674a8a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out @@ -94,24 +94,24 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) -
[47/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join47.q.out -- diff --git a/ql/src/test/results/clientpositive/join47.q.out b/ql/src/test/results/clientpositive/join47.q.out index 2892b8b..169244e 100644 --- a/ql/src/test/results/clientpositive/join47.q.out +++ b/ql/src/test/results/clientpositive/join47.q.out @@ -363,24 +363,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -388,19 +388,23 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3 - residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D)} - Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE - Limit -Number of rows: 10 -Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col0 = _col3) or _col2 or _col5)} + Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE +Limit + Number of rows: 10 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -472,24 +476,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
[03/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out -- diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out index 98789d7..ddb436b 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out @@ -187,25 +187,28 @@ STAGE PLANS: alias: test1_n5 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: int), col_1 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: - 0 {_col0 BETWEEN 100 AND 102} + 0 {_col3} 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 +Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -283,7 +286,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 @@ -296,27 +299,31 @@ STAGE PLANS: alias: test1_n5 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: int), col_1 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: - 0 {_col0 BETWEEN 100 AND 102} + 0 {_col3} 1 keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false +Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +
[42/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/explainuser_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 067a43c..c86450a 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -457,16 +457,16 @@ Stage-0 SHUFFLE [RS_23] PartitionCols:_col0, _col1 Group By Operator [GBY_22] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1 -Select Operator [SEL_21] (rows=2 width=20) - Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_57] (rows=2 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col5, _col1 +Select Operator [SEL_21] (rows=1 width=24) + Output:["_col1","_col5"] + Merge Join Operator [MERGEJOIN_57] (rows=1 width=24) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7"],residual filter predicates:{((_col4 > 0) or _col2)} {((_col4 + _col7) >= 0)} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=84) -Output:["_col0","_col1"] + Select Operator [SEL_2] (rows=18 width=88) +Output:["_col0","_col1","_col2"] Filter Operator [FIL_36] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) @@ -547,16 +547,16 @@ Stage-0 SHUFFLE [RS_23] PartitionCols:_col0, _col1 Group By Operator [GBY_22] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 -Select Operator [SEL_21] (rows=1 width=20) - Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_57] (rows=1 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or _col1 is not null)} {((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 +Select Operator [SEL_21] (rows=1 width=36) + Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_57] (rows=1 width=36) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0)} {((_col6 + _col9) >= 0)} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=84) -Output:["_col0","_col1"] + Select Operator [SEL_2] (rows=18 width=99) +Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_36] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) @@ -630,16 +630,16 @@ Stage-0 SHUFFLE [RS_23] PartitionCols:_col0, _col1 Group By Operator [GBY_22] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_21] (rows=1 width=20) -Output:["_col1","_col4"] -Merge Join Operator [MERGEJOIN_54] (rows=1 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 + Select Operator [SEL_21] (rows=1
[48/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join45.q.out -- diff --git a/ql/src/test/results/clientpositive/join45.q.out b/ql/src/test/results/clientpositive/join45.q.out index 6cf6c33..7865e0e 100644 --- a/ql/src/test/results/clientpositive/join45.q.out +++ b/ql/src/test/results/clientpositive/join45.q.out @@ -363,24 +363,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -388,21 +388,25 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: ((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D) (type: boolean) -Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE -Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false +predicate: ((_col0 = _col3) or _col2 or _col5) (type: boolean) +Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE + Limit +Number of rows: 10 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -474,24 +478,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(key)
[41/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out b/ql/src/test/results/clientpositive/llap/join46.q.out index fcd0d83..ec58429 100644 --- a/ql/src/test/results/clientpositive/llap/join46.q.out +++ b/ql/src/test/results/clientpositive/llap/join46.q.out @@ -187,15 +187,15 @@ STAGE PLANS: alias: test1_n2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: boolean) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -226,20 +226,24 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 filter predicates: - 0 {VALUE._col0 BETWEEN 100 AND 102} + 0 {VALUE._col2} 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE -File Output Operator - compressed: false +Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -308,13 +312,13 @@ STAGE PLANS: alias: test1_n2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE +
[05/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query91.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/query91.q.out index 5b4952d..98e8adf 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out @@ -104,13 +104,13 @@ Stage-0 SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_41] (rows=1 width=585) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col12)"],keys:_col5, _col6, _col17, _col18, _col19 + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col5, _col6, _col14, _col15, _col16 Merge Join Operator [MERGEJOIN_144] (rows=10438 width=473) - Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col12","_col17","_col18","_col19"] + Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col11","_col14","_col15","_col16"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_165] PartitionCols:_col0 -Select Operator [SEL_164] (rows=3600 width=96) +Select Operator [SEL_164] (rows=3600 width=4) Output:["_col0"] Filter Operator [FIL_163] (rows=3600 width=96) predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null) @@ -120,12 +120,12 @@ Stage-0 SHUFFLE [RS_37] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_143] (rows=20876 width=473) - Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col12","_col17","_col18","_col19"] + Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col11","_col14","_col15","_col16"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_142] (rows=657590 width=312) - Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"] + Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_162] PartitionCols:_col0 @@ -152,7 +152,7 @@ Stage-0 <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_159] PartitionCols:_col0 -Select Operator [SEL_158] (rows=50 width=12) +Select Operator [SEL_158] (rows=50 width=4) Output:["_col0"] Filter Operator [FIL_157] (rows=50 width=12) predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) @@ -166,7 +166,7 @@ Stage-0 <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_153] PartitionCols:_col0 -Select Operator [SEL_152] (rows=800 width=116) +Select Operator [SEL_152] (rows=800 width=4) Output:["_col0"] Filter Operator [FIL_151] (rows=800 width=112) predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query92.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query92.q.out b/ql/src/test/results/clientpositive/perf/tez/query92.q.out index 50918f0..1f837dd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query92.q.out @@ -104,9 +104,9 @@ Stage-0 Select Operator [SEL_34] (rows=2478 width=112) Output:["_col2"] Filter Operator [FIL_33] (rows=2478 width=112) - predicate:(_col2 > CAST( (1.3 * _col6) AS decimal(14,7))) + predicate:(_col2 > _col5) Merge Join
[35/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index b1eec43..93791ac 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -667,238 +667,184 @@ POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select s1.key as key, POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### +100val_100 104val_104 105val_105 113val_113 118val_118 +119val_119 12 val_12 120val_120 +128val_128 +129val_129 133val_133 136val_136 +145val_145 155val_155 158val_158 160val_160 162val_162 +167val_167 168val_168 17 val_17 170val_170 +174val_174 175val_175 +178val_178 18 val_18 180val_180 +181val_181 186val_186 19 val_19 +193val_193 197val_197 +199val_199 +20 val_20 200val_200 +201val_201 +213val_213 +214val_214 216val_216 218val_218 +219val_219 +221val_221 222val_222 +223val_223 224val_224 +226val_226 228val_228 +233val_233 235val_235 237val_237 239val_239 +241val_241 244val_244 247val_247 +249val_249 256val_256 +260val_260 +262val_262 263val_263 273val_273 +277val_277 28 val_28 +281val_281 283val_283 286val_286 +287val_287 +288val_288 292val_292 +298val_298 +302val_302 306val_306 308val_308 +310val_310 +323val_323 327val_327 33 val_33 +336val_336 +341val_341 +344val_344 348val_348 +351val_351 353val_353 362val_362 366val_366 +375val_375 +382val_382 +384val_384 +393val_393 +395val_395 396val_396 397val_397 +399val_399 401val_401 +403val_403 +406val_406 409val_409 411val_411 +418val_418 419val_419 427val_427 43 val_43 432val_432 +435val_435 436val_436 439val_439 443val_443 453val_453 +455val_455 +459val_459 460val_460 462val_462 47 val_47 472val_472 -485val_485 -496val_496 -54 val_54 -64 val_64 -70 val_70 -8 val_8 -83 val_83 -84 val_84 -85 val_85 -90 val_90 -0 val_0 -103val_103 -114val_114 -125val_125 -138val_138 -146val_146 -150val_150 -152val_152 -153val_153 -156val_156 -157val_157 -165val_165 -172val_172 -177val_177 -179val_179 -187val_187 -195val_195 -196val_196 -217val_217 -242val_242 -248val_248 -252val_252 -265val_265 -27 val_27 -272val_272 -280val_280 -291val_291 -305val_305 -309val_309 -311val_311 -315val_315 -317val_317 -322val_322 -333val_333 -34 val_34 -345val_345 -35 val_35 -356val_356 -364val_364 -368val_368 -369val_369 -37 val_37 -373val_373 -377val_377 -4 val_4 -402val_402 -404val_404 -413val_413 -42 val_42 -430val_430 -431val_431 -444val_444 -449val_449 -452val_452 -454val_454 -457val_457 -463val_463 -466val_466 -470val_470 -475val_475 -481val_481 -489val_489 -491val_491 -57 val_57 -65 val_65 -66 val_66 -74 val_74 -76 val_76 -78 val_78 -9 val_9 -92 val_92 -95 val_95 -100val_100 -119val_119 -128val_128 -129val_129 -145val_145 -167val_167 -174val_174 -178val_178 -181val_181 -193val_193 -199val_199 -20 val_20 -201val_201 -213val_213 -214val_214 -219val_219 -221val_221 -223val_223 -226val_226 -233val_233 -241val_241 -249val_249 -260val_260 -262val_262 -277val_277 -281val_281 -287val_287 -288val_288 -298val_298 -302val_302 -310val_310 -323val_323 -336val_336 -341val_341 -344val_344 -351val_351 -375val_375 -382val_382 -384val_384 -393val_393 -395val_395 -399val_399 -403val_403 -406val_406 -418val_418 -435val_435 -455val_455 -459val_459 477val_477 478val_478 479val_479 482val_482 +485val_485 493val_493 494val_494 495val_495 +496val_496 497val_497 5 val_5 +54 val_54 58 val_58 +64 val_64 67 val_67 +70 val_70 77 val_77 +8 val_8 80 val_80 +83 val_83 +84 val_84 +85 val_85 86 val_86 +90 val_90 97 val_97 98 val_98 +0 val_0 10 val_10 +103val_103 11 val_11 111
[15/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query17.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index bb18527..642a67f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -147,7 +147,7 @@ Stage-0 Select Operator [SEL_47] (rows=8581091759 width=381) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] Merge Join Operator [MERGEJOIN_213] (rows=8581091759 width=381) - Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] + Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_257] PartitionCols:_col0 @@ -161,12 +161,12 @@ Stage-0 SHUFFLE [RS_44] PartitionCols:_col3 Merge Join Operator [MERGEJOIN_212] (rows=8581091759 width=299) -Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] +Conds:RS_41._col1, _col2, _col4=RS_42._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_42] - PartitionCols:_col7, _col8, _col9 + PartitionCols:_col6, _col7, _col8 Merge Join Operator [MERGEJOIN_211] (rows=1640229377 width=19) -Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] +Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] <-Reducer 10 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_28] PartitionCols:_col2, _col1 @@ -175,7 +175,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_218] (rows=304 width=94) + Select Operator [SEL_218] (rows=304 width=4) Output:["_col0"] Filter Operator [FIL_215] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) @@ -205,7 +205,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_224] PartitionCols:_col0 - Select Operator [SEL_219] (rows=304 width=94) + Select Operator [SEL_219] (rows=304 width=4) Output:["_col0"] Filter Operator [FIL_216] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) @@ -249,7 +249,7 @@ Stage-0 SHUFFLE [RS_41] PartitionCols:_col1, _col2, _col4 Merge Join Operator [MERGEJOIN_208] (rows=27749405 width=294) - Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] + Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_254] PartitionCols:_col0 @@ -267,7 +267,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized
[44/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out index 87c5f48..4a77044 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out @@ -1357,15 +1357,16 @@ STAGE PLANS: Tez A masked pattern was here Edges: -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) A masked pattern was here Vertices: Map 1 Map Operator Tree: TableScan - alias: x + alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -1412,10 +1413,10 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true -Map 6 +Map 7 Map Operator Tree: TableScan - alias: z + alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -1442,25 +1443,23 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 - Right Outer Join 1 to 2 + Left Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) -outputColumnNames: _col2 -Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col2 (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) MergeJoin Vectorization: enabled: false @@ -1514,6 +1513,25 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +Reducer 6 +Execution mode: llap +Reduce Operator Tree: + Merge Join Operator +condition map: + Left Outer Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col0 (type: int) +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +MergeJoin Vectorization: +enabled: false +enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1573,15 +1591,16 @@ STAGE PLANS: Tez A masked pattern was here Edges: -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4
[40/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/mapjoin46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out index a1fe936..d9d2396 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -180,29 +180,33 @@ STAGE PLANS: alias: test1_n4 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator -compressed: false + Select Operator +expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs Map 2 @@ -294,29 +298,33 @@ STAGE PLANS: alias: test1_n4 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator -compressed: false +
[20/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out index 9633df1..cbf9bca 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out @@ -75,7 +75,7 @@ HiveSortLimit(fetch=[100]) HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) -HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) +HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) @@ -90,7 +90,7 @@ HiveSortLimit(fetch=[100]) HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) -HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) +HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) @@ -105,7 +105,7 @@ HiveSortLimit(fetch=[100]) HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) -HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) +HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out index fd3038e..51bb901 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out @@ -68,8 +68,8 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[ HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, /(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)))]) HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, >(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)), 1))]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) - HiveProject($f0=[$9], $f1=[$8], $f2=[$0], $f4=[$4], $f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)]) -HiveJoin(condition=[=($3, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$7], $f1=[$6], $f2=[$0], $f4=[$4], $f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)]) +HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) @@ -78,7 +78,7 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[ HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))])
[10/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query56.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/query56.q.out index 18f64cc..17458f4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out @@ -204,9 +204,9 @@ Stage-0 SHUFFLE [RS_71] PartitionCols:_col0 Group By Operator [GBY_70] (rows=355 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 Merge Join Operator [MERGEJOIN_303] (rows=339151 width=100) - Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"] + Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col0 @@ -239,15 +239,15 @@ Stage-0 default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_67] -PartitionCols:_col4 +PartitionCols:_col3 Select Operator [SEL_62] (rows=1550375 width=13) - Output:["_col4","_col5"] + Output:["_col3","_col4"] Merge Join Operator [MERGEJOIN_298] (rows=1550375 width=13) Conds:RS_59._col1=RS_346._col0(Inner),Output:["_col2","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_346] PartitionCols:_col0 - Select Operator [SEL_343] (rows=800 width=116) + Select Operator [SEL_343] (rows=800 width=4) Output:["_col0"] Filter Operator [FIL_342] (rows=800 width=112) predicate:((ca_gmt_offset = -8) and ca_address_sk is not null) @@ -261,7 +261,7 @@ Stage-0 <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_330] PartitionCols:_col0 - Select Operator [SEL_327] (rows=50 width=12) + Select Operator [SEL_327] (rows=50 width=4) Output:["_col0"] Filter Operator [FIL_326] (rows=50 width=12) predicate:((d_moy = 1) and (d_year = 2000) and d_date_sk is not null) @@ -320,18 +320,18 @@ Stage-0 SHUFFLE [RS_109] PartitionCols:_col0 Group By Operator [GBY_108] (rows=355 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 Merge Join Operator [MERGEJOIN_304] (rows=172427 width=188) - Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"] + Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_104] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_293] <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_105] -PartitionCols:_col3 +PartitionCols:_col2 Select Operator [SEL_100] (rows=788222 width=110) - Output:["_col3","_col5"] + Output:["_col2","_col4"] Merge Join Operator [MERGEJOIN_301] (rows=788222 width=110) Conds:RS_97._col2=RS_348._col0(Inner),Output:["_col1","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized @@ -400,18 +400,18 @@
[18/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out index fca31ef..e49b44b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out @@ -82,10 +82,10 @@ CBO PLAN: HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], agg#2=[count()]) - HiveProject($f0=[$15], $f1=[$13], $f2=[$22], $f3=[CASE(IS NULL($28), 1, 0)], $f4=[CASE(IS NOT NULL($28), 1, 0)]) -HiveJoin(condition=[AND(=($29, $4), =($30, $6))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], cd_demo_sk=[$22], cd_marital_status=[$23], hd_demo_sk=[$24], hd_buy_potential=[$25], d_date_sk=[$18], d_date=[$19], d_week_seq=[$20], d_year=[$21], d_date_sk0=[$27], d_week_seq0=[$28], d_date_sk1=[$6], d_date0=[$7], p_promo_sk=[$26]) -HiveJoin(condition=[AND(=($0, $27), =($20, $28))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$15], $f1=[$13], $f2=[$19], $f3=[CASE(IS NULL($25), 1, 0)], $f4=[CASE(IS NOT NULL($25), 1, 0)]) +HiveJoin(condition=[AND(=($26, $4), =($27, $6))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], cd_demo_sk=[$21], hd_demo_sk=[$22], d_date_sk=[$18], d_week_seq=[$19], +=[$20], d_date_sk0=[$24], d_week_seq0=[$25], d_date_sk1=[$6], CAST=[$7], p_promo_sk=[$23]) +HiveJoin(condition=[AND(=($0, $24), =($19, $25))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($14, $1), <($3, $17))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) @@ -94,29 +94,29 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) -HiveProject(d_date_sk=[$0], d_date=[$1], i_item_sk=[$2], i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_date0=[$13], d_week_seq=[$14], d_year=[$15], cd_demo_sk=[$16], cd_marital_status=[$17], hd_demo_sk=[$18], hd_buy_potential=[$19], p_promo_sk=[$20]) - HiveJoin(condition=[AND(=($5, $0), >(CAST($1):DOUBLE, +(CAST($13):DOUBLE, 5)))], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(d_date_sk=[$0], d_date=[$2]) +HiveProject(d_date_sk=[$0], CAST=[$1], i_item_sk=[$2], i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_week_seq=[$13], +=[$14], cd_demo_sk=[$15], hd_demo_sk=[$16], p_promo_sk=[$17]) + HiveJoin(condition=[AND(=($5, $0), >($1, $14))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_desc=[$4]) HiveFilter(condition=[IS NOT NULL($0)])
[13/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query27.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/query27.q.out index 59cca4f..d7fd2ed 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -94,7 +94,7 @@ Stage-0 Select Operator [SEL_27] (rows=1427275 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_100] (rows=1427275 width=186) - Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"] + Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_130] PartitionCols:_col0 @@ -108,7 +108,7 @@ Stage-0 SHUFFLE [RS_24] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_99] (rows=1427275 width=90) - Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col15"] + Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_119] PartitionCols:_col0 @@ -126,7 +126,7 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_110] (rows=652 width=8) + Select Operator [SEL_110] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_109] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) @@ -140,7 +140,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_102] (rows=14776 width=269) + Select Operator [SEL_102] (rows=14776 width=4) Output:["_col0"] Filter Operator [FIL_101] (rows=14776 width=268) predicate:((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query29.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index a21c3c7..19f121e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -144,20 +144,20 @@ Stage-0 SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_48] (rows=21091879 width=496) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7, _col8, _col27, _col28 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col13)","sum(_col19)","sum(_col3)"],keys:_col6, _col7, _col22, _col23 Top N Key Operator [TNK_93] (rows=4156223234 width=483) - keys:_col7, _col8, _col27, _col28,sort order:,top n:100 + keys:_col6, _col7, _col22, _col23,sort order:,top n:100 Merge Join Operator [MERGEJOIN_205] (rows=4156223234 width=483) -Conds:RS_44._col1, _col2=RS_45._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"] +Conds:RS_44._col2, _col1=RS_45._col11, _col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_44] - PartitionCols:_col1,
[39/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out index ba9e81d..f84d13f 100644 --- a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out +++ b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out @@ -98,12 +98,12 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -118,12 +118,12 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -152,10 +152,10 @@ STAGE PLANS: keys: 0 _col9 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 100 Data size: 62700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col9 (type: int), 'foo_n1' (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), 'bar' (type: string) + expressions: _col9 (type: int), 'foo_n1' (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: int), 'bar' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 100 Data size: 80400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -240,17 +240,17 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40
[25/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query66.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out b/ql/src/test/results/clientpositive/perf/spark/query66.q.out index e8ef1dc..80723d8 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out @@ -624,8 +624,8 @@ STAGE PLANS: predicate: (ws_ship_mode_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_paid_inc_tax (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk (type: int), (ws_sales_price * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)), (ws_net_paid_inc_tax * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -633,7 +633,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 +outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 6 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE @@ -642,7 +642,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -656,8 +656,8 @@ STAGE PLANS: predicate: (cs_ship_mode_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_paid_inc_ship_tax (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), (cs_ext_sales_price * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)), (cs_net_paid_inc_ship_tax * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -665,7 +665,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 +outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 13 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE @@ -674,7 +674,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int)
[49/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out -- diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index 1e195bc..b6d726e 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -542,13 +542,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@encrypted_table POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### -OPTIMIZED SQL: SELECT * -FROM (SELECT `key`, `value` +OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`key` AS `key1`, `t2`.`value` AS `value1` +FROM (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST` FROM `default`.`src` WHERE `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value` +INNER JOIN (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST` FROM `default`.`encrypted_table` -WHERE `key` IS NOT NULL) AS `t2` ON CAST(`t0`.`key` AS DOUBLE) = CAST(`t2`.`key` AS DOUBLE) +WHERE `key` IS NOT NULL) AS `t2` ON `t0`.`CAST` = `t2`.`CAST` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -567,14 +567,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col2 (type: double) null sort order: a sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col2 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col0 (type: string), _col1 (type: string) @@ -589,14 +589,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: int), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col2 (type: double) null sort order: a sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col2 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col0 (type: int), _col1 (type: string) @@ -714,32 +714,36 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 UDFToDouble(_col0) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 +0 _col2 (type: double) +1 _col2 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -GlobalTableId: 0 -directory: hdfs://### HDFS PATH ### -NumFilesPerFileSink: 1 + Select Operator +expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Stats Publishing Key Prefix: hdfs://### HDFS PATH ### -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format:
[46/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join_filters_overlap.q.out -- diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/join_filters_overlap.q.out index 7b5c9f2..847b45d 100644 --- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -18,15 +18,6 @@ POSTHOOK: query: explain extended select * from a_n4 left outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 A masked pattern was here -OPTIMIZED SQL: SELECT * -FROM (SELECT `key`, `value` -FROM `default`.`a_n4`) AS `t` -LEFT JOIN (SELECT `key`, CAST(50 AS INTEGER) AS `value` -FROM `default`.`a_n4` -WHERE `value` = 50) AS `t1` ON `t`.`key` = `t1`.`key` AND `t`.`value` = 50 -LEFT JOIN (SELECT `key`, CAST(60 AS INTEGER) AS `value` -FROM `default`.`a_n4` -WHERE `value` = 60) AS `t3` ON `t`.`key` = `t3`.`key` AND `t`.`value` = 60 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,8 +31,8 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: int), value (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: int), value (type: int), (value = 60) (type: boolean), (value = 50) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -50,7 +41,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE tag: 0 -value expressions: _col1 (type: int) +value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean) auto parallelism: false TableScan alias: b @@ -158,37 +149,41 @@ STAGE PLANS: filter mappings: 0 [1, 1, 2, 1] filter predicates: -0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} +0 {VALUE._col2} {VALUE._col1} 1 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -GlobalTableId: 0 - A masked pattern was here -NumFilesPerFileSink: 1 + Select Operator +expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - A masked pattern was here -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -TotalFiles: 1 -GatherStats: false -MultiFileSpray: false +File Output Operator + compressed: false + GlobalTableId: 0 + A masked pattern was here + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + A masked pattern was here + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: +columns _col0,_col1,_col2,_col3,_col4,_col5 +columns.types int:int:int:int:int:int +escape.delim \ +hive.serialization.extend.additional.nesting.levels
[12/51] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query4.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/query4.q.out index 27ce7b5..bb0d7ba 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query4.q.out @@ -271,367 +271,355 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_575] -Limit [LIM_574] (rows=100 width=85) + File Output Operator [FS_557] +Limit [LIM_556] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_573] (rows=7323197 width=85) + Select Operator [SEL_555] (rows=7323197 width=85) Output:["_col0"] <-Reducer 9 [SIMPLE_EDGE] -SHUFFLE [RS_147] - Select Operator [SEL_146] (rows=7323197 width=85) +SHUFFLE [RS_141] + Select Operator [SEL_140] (rows=7323197 width=85) Output:["_col0"] -Filter Operator [FIL_145] (rows=7323197 width=533) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > (_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END - Merge Join Operator [MERGEJOIN_478] (rows=14646395 width=533) - Conds:RS_142._col2=RS_572._col0(Inner),Output:["_col3","_col5","_col9","_col11","_col12"] +Filter Operator [FIL_139] (rows=7323197 width=537) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col14 / _col3))) ELSE ((null > (_col14 / _col3))) END) ELSE (CASE WHEN (_col9) THEN (((_col11 / _col8) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_472] (rows=14646395 width=537) + Conds:RS_136._col2=RS_554._col0(Inner),Output:["_col3","_col8","_col9","_col11","_col13","_col14"] <-Reducer 30 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_572] +SHUFFLE [RS_554] PartitionCols:_col0 - Select Operator [SEL_571] (rows=8000 width=297) + Select Operator [SEL_553] (rows=8000 width=297) Output:["_col0","_col1","_col2"] -Group By Operator [GBY_570] (rows=8000 width=764) +Group By Operator [GBY_552] (rows=8000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_126] + SHUFFLE [RS_120] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 -Group By Operator [GBY_125] (rows=8000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_123] (rows=187573258 width=1043) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] -Merge Join Operator [MERGEJOIN_473] (rows=187573258 width=1043) - Conds:RS_120._col1=RS_518._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] -<-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_518] -PartitionCols:_col0 -Select Operator [SEL_517] (rows=8000 width=656) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_516] (rows=8000 width=656) -predicate:(c_customer_id is not null and c_customer_sk is not null) -TableScan [TS_114] (rows=8000 width=656) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] -<-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_120] -