HIVE-19147 : Fix PerfCliDrivers: Tpcds30T missed CAT_NAME change (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/244ca8e5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/244ca8e5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/244ca8e5 Branch: refs/heads/master Commit: 244ca8e5c3192acd017d691ccdbaf0fa06c9fe39 Parents: b3fe652 Author: Zoltan Haindrich <k...@rxd.hu> Authored: Wed Apr 11 20:05:01 2018 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Wed Apr 11 20:05:01 2018 -0700 ---------------------------------------------------------------------- data/conf/perf-reg/spark/hive-site.xml | 6 + data/conf/perf-reg/tez/hive-site.xml | 5 + .../hive/cli/control/CorePerfCliDriver.java | 6 +- .../hadoop/hive/ql/MetaStoreDumpUtility.java | 231 +++++++++ .../org/apache/hadoop/hive/ql/QTestUtil.java | 221 +-------- .../hive/ql/parse/BaseSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- .../clientpositive/perf/spark/query11.q.out | 48 +- .../clientpositive/perf/spark/query15.q.out | 164 +++---- .../clientpositive/perf/spark/query16.q.out | 70 +-- .../clientpositive/perf/spark/query18.q.out | 216 ++++---- .../clientpositive/perf/spark/query19.q.out | 218 ++++----- .../clientpositive/perf/spark/query21.q.out | 114 +++-- .../clientpositive/perf/spark/query24.q.out | 282 +++++------ .../clientpositive/perf/spark/query25.q.out | 118 ++--- .../clientpositive/perf/spark/query29.q.out | 280 +++++------ .../clientpositive/perf/spark/query30.q.out | 262 +++++----- .../clientpositive/perf/spark/query32.q.out | 132 ++--- .../clientpositive/perf/spark/query34.q.out | 34 +- .../clientpositive/perf/spark/query35.q.out | 74 ++- .../clientpositive/perf/spark/query37.q.out | 16 +- .../clientpositive/perf/spark/query4.q.out | 214 ++++---- .../clientpositive/perf/spark/query40.q.out | 116 +++-- .../clientpositive/perf/spark/query44.q.out | 246 +++++----- .../clientpositive/perf/spark/query45.q.out | 204 ++++---- .../clientpositive/perf/spark/query46.q.out | 104 ++-- .../clientpositive/perf/spark/query47.q.out | 92 ++-- .../clientpositive/perf/spark/query48.q.out | 94 ++-- .../clientpositive/perf/spark/query5.q.out | 38 +- .../clientpositive/perf/spark/query50.q.out | 196 ++++---- .../clientpositive/perf/spark/query53.q.out | 105 ++-- .../clientpositive/perf/spark/query54.q.out | 277 ++++++----- .../clientpositive/perf/spark/query57.q.out | 92 ++-- .../clientpositive/perf/spark/query58.q.out | 490 +++++++++---------- .../clientpositive/perf/spark/query6.q.out | 350 +++++++------ .../clientpositive/perf/spark/query61.q.out | 40 +- .../clientpositive/perf/spark/query63.q.out | 105 ++-- .../clientpositive/perf/spark/query65.q.out | 100 ++-- .../clientpositive/perf/spark/query66.q.out | 20 +- .../clientpositive/perf/spark/query67.q.out | 137 +++--- .../clientpositive/perf/spark/query68.q.out | 104 ++-- .../clientpositive/perf/spark/query72.q.out | 461 +++++++++-------- .../clientpositive/perf/spark/query73.q.out | 34 +- .../clientpositive/perf/spark/query75.q.out | 248 +++++----- .../clientpositive/perf/spark/query76.q.out | 130 +++-- .../clientpositive/perf/spark/query77.q.out | 66 +-- .../clientpositive/perf/spark/query78.q.out | 273 +++++------ .../clientpositive/perf/spark/query79.q.out | 10 +- .../clientpositive/perf/spark/query8.q.out | 174 +++---- .../clientpositive/perf/spark/query80.q.out | 417 ++++++++-------- .../clientpositive/perf/spark/query81.q.out | 270 +++++----- .../clientpositive/perf/spark/query82.q.out | 16 +- .../clientpositive/perf/spark/query83.q.out | 354 +++++++------- .../clientpositive/perf/spark/query85.q.out | 309 ++++++------ .../clientpositive/perf/spark/query88.q.out | 272 +++++----- .../clientpositive/perf/spark/query89.q.out | 26 +- .../clientpositive/perf/spark/query90.q.out | 68 +-- .../clientpositive/perf/spark/query91.q.out | 14 +- .../clientpositive/perf/spark/query92.q.out | 106 ++-- .../clientpositive/perf/spark/query94.q.out | 70 +-- .../clientpositive/perf/spark/query95.q.out | 100 ++-- .../clientpositive/perf/spark/query97.q.out | 54 +- .../clientpositive/perf/spark/query99.q.out | 42 +- 63 files changed, 4603 insertions(+), 4536 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/data/conf/perf-reg/spark/hive-site.xml ---------------------------------------------------------------------- diff --git a/data/conf/perf-reg/spark/hive-site.xml b/data/conf/perf-reg/spark/hive-site.xml index 5ca660d..8a32afe 100644 --- a/data/conf/perf-reg/spark/hive-site.xml +++ b/data/conf/perf-reg/spark/hive-site.xml @@ -270,4 +270,10 @@ <value>false</value> </property> +<property> + <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name> + <value>99</value> +</property> + + </configuration> http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/data/conf/perf-reg/tez/hive-site.xml ---------------------------------------------------------------------- diff --git a/data/conf/perf-reg/tez/hive-site.xml b/data/conf/perf-reg/tez/hive-site.xml index 62ecb74..e11f8f8 100644 --- a/data/conf/perf-reg/tez/hive-site.xml +++ b/data/conf/perf-reg/tez/hive-site.xml @@ -287,4 +287,9 @@ <value>false</value> </property> +<property> + <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name> + <value>99</value> +</property> + </configuration> http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CorePerfCliDriver.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CorePerfCliDriver.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CorePerfCliDriver.java index 8ef9e0a..3ae691f 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CorePerfCliDriver.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CorePerfCliDriver.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.cli.control; @@ -29,6 +29,7 @@ import com.google.common.base.Strings; import org.apache.hadoop.hive.ql.QTestProcessExecResult; import org.apache.hadoop.hive.ql.QTestUtil; import org.apache.hadoop.hive.ql.QTestUtil.MiniClusterType; +import org.apache.hadoop.hive.ql.MetaStoreDumpUtility; import org.junit.After; import org.junit.AfterClass; /** @@ -70,7 +71,8 @@ public class CorePerfCliDriver extends CliAdapter{ qt.createSources(); // Manually modify the underlying metastore db to reflect statistics corresponding to // the 30TB TPCDS scale set. This way the optimizer will generate plans for a 30 TB set. - QTestUtil.setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(qt.getConf()); + MetaStoreDumpUtility.setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(qt.getConf(), + System.getProperty(QTestUtil.TEST_TMP_DIR_PROPERTY)); } catch (Exception e) { System.err.println("Exception: " + e.getMessage()); e.printStackTrace(); http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/itests/util/src/main/java/org/apache/hadoop/hive/ql/MetaStoreDumpUtility.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/MetaStoreDumpUtility.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/MetaStoreDumpUtility.java new file mode 100644 index 0000000..2389c3b --- /dev/null +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/MetaStoreDumpUtility.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.InputStreamReader; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Stream; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hive.testutils.HiveTestEnvSetup; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class which can load an existing metastore dump. + * + * This can be used to check planning on a large scale database. + */ +public class MetaStoreDumpUtility { + + static final Logger LOG = LoggerFactory.getLogger(MetaStoreDumpUtility.class); + + public static void setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(HiveConf conf, String tmpBaseDir) { + Connection conn = null; + + try { + Properties props = new Properties(); // connection properties + props.put("user", conf.get("javax.jdo.option.ConnectionUserName")); + props.put("password", conf.get("javax.jdo.option.ConnectionPassword")); + String url = conf.get("javax.jdo.option.ConnectionURL"); + conn = DriverManager.getConnection(url, props); + ResultSet rs = null; + Statement s = conn.createStatement(); + + if (LOG.isDebugEnabled()) { + LOG.debug("Connected to metastore database "); + } + + String mdbPath = HiveTestEnvSetup.HIVE_ROOT + "/data/files/tpcds-perf/metastore_export/"; + + // Setup the table column stats + BufferedReader br = new BufferedReader( + new FileReader( + new File(HiveTestEnvSetup.HIVE_ROOT + "/metastore/scripts/upgrade/derby/022-HIVE-11107.derby.sql"))); + String command; + + s.execute("DROP TABLE APP.TABLE_PARAMS"); + s.execute("DROP TABLE APP.TAB_COL_STATS"); + // Create the column stats table + while ((command = br.readLine()) != null) { + if (!command.endsWith(";")) { + continue; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Going to run command : " + command); + } + PreparedStatement psCommand = conn.prepareStatement(command.substring(0, command.length() - 1)); + psCommand.execute(); + psCommand.close(); + if (LOG.isDebugEnabled()) { + LOG.debug("successfully completed " + command); + } + } + br.close(); + + java.nio.file.Path tabColStatsCsv = FileSystems.getDefault().getPath(mdbPath, "csv" ,"TAB_COL_STATS.txt.bz2"); + java.nio.file.Path tabParamsCsv = FileSystems.getDefault().getPath(mdbPath, "csv", "TABLE_PARAMS.txt.bz2"); + + // Set up the foreign key constraints properly in the TAB_COL_STATS data + java.nio.file.Path tmpFileLoc1 = FileSystems.getDefault().getPath(tmpBaseDir, "TAB_COL_STATS.txt"); + java.nio.file.Path tmpFileLoc2 = FileSystems.getDefault().getPath(tmpBaseDir, "TABLE_PARAMS.txt"); + + class MyComp implements Comparator<String> { + @Override + public int compare(String str1, String str2) { + if (str2.length() != str1.length()) { + return str2.length() - str1.length(); + } + return str1.compareTo(str2); + } + } + + final SortedMap<String, Integer> tableNameToID = new TreeMap<String, Integer>(new MyComp()); + + rs = s.executeQuery("SELECT * FROM APP.TBLS"); + while(rs.next()) { + String tblName = rs.getString("TBL_NAME"); + Integer tblId = rs.getInt("TBL_ID"); + tableNameToID.put(tblName, tblId); + + if (LOG.isDebugEnabled()) { + LOG.debug("Resultset : " + tblName + " | " + tblId); + } + } + + final Map<String, Map<String, String>> data = new HashMap<>(); + rs = s.executeQuery("select TBLS.TBL_NAME, a.COLUMN_NAME, a.TYPE_NAME from " + + "(select COLUMN_NAME, TYPE_NAME, SDS.SD_ID from APP.COLUMNS_V2 join APP.SDS on SDS.CD_ID = COLUMNS_V2.CD_ID) a" + + " join APP.TBLS on TBLS.SD_ID = a.SD_ID"); + while (rs.next()) { + String tblName = rs.getString(1); + String colName = rs.getString(2); + String typeName = rs.getString(3); + Map<String, String> cols = data.get(tblName); + if (null == cols) { + cols = new HashMap<>(); + } + cols.put(colName, typeName); + data.put(tblName, cols); + } + + BufferedReader reader = new BufferedReader(new InputStreamReader( + new BZip2CompressorInputStream(Files.newInputStream(tabColStatsCsv, StandardOpenOption.READ)))); + + Stream<String> replaced = reader.lines().parallel().map(str-> { + String[] splits = str.split(","); + String tblName = splits[0]; + String colName = splits[1]; + Integer tblID = tableNameToID.get(tblName); + StringBuilder sb = new StringBuilder("default@"+tblName + "@" + colName + "@" + data.get(tblName).get(colName)+"@"); + for (int i = 2; i < splits.length; i++) { + sb.append(splits[i]+"@"); + } + // Add tbl_id and empty bitvector + return sb.append(tblID).append("@").toString(); + }); + + Files.write(tmpFileLoc1, (Iterable<String>)replaced::iterator); + replaced.close(); + reader.close(); + + BufferedReader reader2 = new BufferedReader(new InputStreamReader( + new BZip2CompressorInputStream(Files.newInputStream(tabParamsCsv, StandardOpenOption.READ)))); + final Map<String,String> colStats = new ConcurrentHashMap<>(); + Stream<String> replacedStream = reader2.lines().parallel().map(str-> { + String[] splits = str.split("_@"); + String tblName = splits[0]; + Integer tblId = tableNameToID.get(tblName); + Map<String,String> cols = data.get(tblName); + StringBuilder sb = new StringBuilder(); + sb.append("{\"COLUMN_STATS\":{"); + for (String colName : cols.keySet()) { + sb.append("\""+colName+"\":\"true\","); + } + sb.append("},\"BASIC_STATS\":\"true\"}"); + colStats.put(tblId.toString(), sb.toString()); + + return tblId.toString() + "@" + splits[1]; + }); + + Files.write(tmpFileLoc2, (Iterable<String>)replacedStream::iterator); + Files.write(tmpFileLoc2, (Iterable<String>)colStats.entrySet().stream() + .map(map->map.getKey()+"@COLUMN_STATS_ACCURATE@"+map.getValue())::iterator, StandardOpenOption.APPEND); + + replacedStream.close(); + reader2.close(); + // Load the column stats and table params with 30 TB scale + String importStatement1 = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE(null, '" + "TAB_COL_STATS" + + "', '" + tmpFileLoc1.toAbsolutePath().toString() + + "', '@', null, 'UTF-8', 1)"; + String importStatement2 = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE(null, '" + "TABLE_PARAMS" + + "', '" + tmpFileLoc2.toAbsolutePath().toString() + + "', '@', null, 'UTF-8', 1)"; + + PreparedStatement psImport1 = conn.prepareStatement(importStatement1); + if (LOG.isDebugEnabled()) { + LOG.debug("Going to execute : " + importStatement1); + } + psImport1.execute(); + psImport1.close(); + if (LOG.isDebugEnabled()) { + LOG.debug("successfully completed " + importStatement1); + } + PreparedStatement psImport2 = conn.prepareStatement(importStatement2); + if (LOG.isDebugEnabled()) { + LOG.debug("Going to execute : " + importStatement2); + } + psImport2.execute(); + psImport2.close(); + if (LOG.isDebugEnabled()) { + LOG.debug("successfully completed " + importStatement2); + } + + s.execute("ALTER TABLE APP.TAB_COL_STATS ADD COLUMN CAT_NAME VARCHAR(256)"); + s.execute("update APP.TAB_COL_STATS set CAT_NAME = '" + Warehouse.DEFAULT_CATALOG_NAME + "'"); + + s.close(); + + conn.close(); + + } catch (Exception e) { + throw new RuntimeException("error while loading tpcds metastore dump", e); + } + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 3cdad28..c33851f 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -38,38 +38,23 @@ import java.io.Serializable; import java.io.StringWriter; import java.net.URL; import java.nio.charset.StandardCharsets; -import java.nio.file.FileSystems; -import java.nio.file.Files; -import java.nio.file.StandardOpenOption; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Comparator; import java.util.Deque; import java.util.EnumSet; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Properties; import java.util.Set; -import java.util.SortedMap; import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Stream; import junit.framework.TestSuite; -import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.lang.StringUtils; @@ -131,8 +116,6 @@ import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooKeeper; -import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; - import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; @@ -154,12 +137,12 @@ public class QTestUtil { private static final String CRLF = System.getProperty("line.separator"); public static final String QTEST_LEAVE_FILES = "QTEST_LEAVE_FILES"; - private static final Logger LOG = LoggerFactory.getLogger("QTestUtil"); + static final Logger LOG = LoggerFactory.getLogger("QTestUtil"); private final static String defaultInitScript = "q_test_init.sql"; private final static String defaultCleanupScript = "q_test_cleanup.sql"; private final String[] testOnlyCommands = new String[]{"crypto"}; - private static final String TEST_TMP_DIR_PROPERTY = "test.tmp.dir"; // typically target/tmp + public static final String TEST_TMP_DIR_PROPERTY = "test.tmp.dir"; // typically target/tmp private static final String BUILD_DIR_PROPERTY = "build.dir"; // typically target public static final String TEST_SRC_TABLES_PROPERTY = "test.src.tables"; @@ -2244,206 +2227,6 @@ public class QTestUtil { } } - public static void setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(HiveConf conf) { - Connection conn = null; - ArrayList<Statement> statements = new ArrayList<Statement>(); // list of Statements, PreparedStatements - - try { - Properties props = new Properties(); // connection properties - props.put("user", conf.get("javax.jdo.option.ConnectionUserName")); - props.put("password", conf.get("javax.jdo.option.ConnectionPassword")); - conn = DriverManager.getConnection(conf.get("javax.jdo.option.ConnectionURL"), props); - ResultSet rs = null; - Statement s = conn.createStatement(); - - if (LOG.isDebugEnabled()) { - LOG.debug("Connected to metastore database "); - } - - String mdbPath = AbstractCliConfig.HIVE_ROOT + "/data/files/tpcds-perf/metastore_export/"; - - // Setup the table column stats - BufferedReader br = new BufferedReader( - new FileReader( - new File(AbstractCliConfig.HIVE_ROOT + "/metastore/scripts/upgrade/derby/022-HIVE-11107.derby.sql"))); - String command; - - s.execute("DROP TABLE APP.TABLE_PARAMS"); - s.execute("DROP TABLE APP.TAB_COL_STATS"); - // Create the column stats table - while ((command = br.readLine()) != null) { - if (!command.endsWith(";")) { - continue; - } - if (LOG.isDebugEnabled()) { - LOG.debug("Going to run command : " + command); - } - try { - PreparedStatement psCommand = conn.prepareStatement(command.substring(0, command.length()-1)); - statements.add(psCommand); - psCommand.execute(); - if (LOG.isDebugEnabled()) { - LOG.debug("successfully completed " + command); - } - } catch (SQLException e) { - LOG.info("Got SQL Exception " + e.getMessage()); - } - } - br.close(); - - java.nio.file.Path tabColStatsCsv = FileSystems.getDefault().getPath(mdbPath, "csv" ,"TAB_COL_STATS.txt.bz2"); - java.nio.file.Path tabParamsCsv = FileSystems.getDefault().getPath(mdbPath, "csv", "TABLE_PARAMS.txt.bz2"); - - // Set up the foreign key constraints properly in the TAB_COL_STATS data - String tmpBaseDir = System.getProperty(TEST_TMP_DIR_PROPERTY); - java.nio.file.Path tmpFileLoc1 = FileSystems.getDefault().getPath(tmpBaseDir, "TAB_COL_STATS.txt"); - java.nio.file.Path tmpFileLoc2 = FileSystems.getDefault().getPath(tmpBaseDir, "TABLE_PARAMS.txt"); - - class MyComp implements Comparator<String> { - @Override - public int compare(String str1, String str2) { - if (str2.length() != str1.length()) { - return str2.length() - str1.length(); - } - return str1.compareTo(str2); - } - } - - final SortedMap<String, Integer> tableNameToID = new TreeMap<String, Integer>(new MyComp()); - - rs = s.executeQuery("SELECT * FROM APP.TBLS"); - while(rs.next()) { - String tblName = rs.getString("TBL_NAME"); - Integer tblId = rs.getInt("TBL_ID"); - tableNameToID.put(tblName, tblId); - - if (LOG.isDebugEnabled()) { - LOG.debug("Resultset : " + tblName + " | " + tblId); - } - } - - final Map<String, Map<String, String>> data = new HashMap<>(); - rs = s.executeQuery("select TBLS.TBL_NAME, a.COLUMN_NAME, a.TYPE_NAME from " - + "(select COLUMN_NAME, TYPE_NAME, SDS.SD_ID from APP.COLUMNS_V2 join APP.SDS on SDS.CD_ID = COLUMNS_V2.CD_ID) a" - + " join APP.TBLS on TBLS.SD_ID = a.SD_ID"); - while (rs.next()) { - String tblName = rs.getString(1); - String colName = rs.getString(2); - String typeName = rs.getString(3); - Map<String, String> cols = data.get(tblName); - if (null == cols) { - cols = new HashMap<>(); - } - cols.put(colName, typeName); - data.put(tblName, cols); - } - - BufferedReader reader = new BufferedReader(new InputStreamReader( - new BZip2CompressorInputStream(Files.newInputStream(tabColStatsCsv, StandardOpenOption.READ)))); - - Stream<String> replaced = reader.lines().parallel().map(str-> { - String[] splits = str.split(","); - String tblName = splits[0]; - String colName = splits[1]; - Integer tblID = tableNameToID.get(tblName); - StringBuilder sb = new StringBuilder("default@"+tblName + "@" + colName + "@" + data.get(tblName).get(colName)+"@"); - for (int i = 2; i < splits.length; i++) { - sb.append(splits[i]+"@"); - } - // Add tbl_id and empty bitvector - return sb.append(tblID).append("@").toString(); - }); - - Files.write(tmpFileLoc1, (Iterable<String>)replaced::iterator); - replaced.close(); - reader.close(); - - BufferedReader reader2 = new BufferedReader(new InputStreamReader( - new BZip2CompressorInputStream(Files.newInputStream(tabParamsCsv, StandardOpenOption.READ)))); - final Map<String,String> colStats = new ConcurrentHashMap<>(); - Stream<String> replacedStream = reader2.lines().parallel().map(str-> { - String[] splits = str.split("_@"); - String tblName = splits[0]; - Integer tblId = tableNameToID.get(tblName); - Map<String,String> cols = data.get(tblName); - StringBuilder sb = new StringBuilder(); - sb.append("{\"COLUMN_STATS\":{"); - for (String colName : cols.keySet()) { - sb.append("\""+colName+"\":\"true\","); - } - sb.append("},\"BASIC_STATS\":\"true\"}"); - colStats.put(tblId.toString(), sb.toString()); - - return tblId.toString() + "@" + splits[1]; - }); - - Files.write(tmpFileLoc2, (Iterable<String>)replacedStream::iterator); - Files.write(tmpFileLoc2, (Iterable<String>)colStats.entrySet().stream() - .map(map->map.getKey()+"@COLUMN_STATS_ACCURATE@"+map.getValue())::iterator, StandardOpenOption.APPEND); - - replacedStream.close(); - reader2.close(); - // Load the column stats and table params with 30 TB scale - String importStatement1 = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE(null, '" + "TAB_COL_STATS" + - "', '" + tmpFileLoc1.toAbsolutePath().toString() + - "', '@', null, 'UTF-8', 1)"; - String importStatement2 = "CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE(null, '" + "TABLE_PARAMS" + - "', '" + tmpFileLoc2.toAbsolutePath().toString() + - "', '@', null, 'UTF-8', 1)"; - try { - PreparedStatement psImport1 = conn.prepareStatement(importStatement1); - if (LOG.isDebugEnabled()) { - LOG.debug("Going to execute : " + importStatement1); - } - statements.add(psImport1); - psImport1.execute(); - if (LOG.isDebugEnabled()) { - LOG.debug("successfully completed " + importStatement1); - } - PreparedStatement psImport2 = conn.prepareStatement(importStatement2); - if (LOG.isDebugEnabled()) { - LOG.debug("Going to execute : " + importStatement2); - } - statements.add(psImport2); - psImport2.execute(); - if (LOG.isDebugEnabled()) { - LOG.debug("successfully completed " + importStatement2); - } - } catch (SQLException e) { - LOG.info("Got SQL Exception " + e.getMessage()); - } - } catch (FileNotFoundException e1) { - LOG.info("Got File not found Exception " + e1.getMessage()); - } catch (IOException e1) { - LOG.info("Got IOException " + e1.getMessage()); - } catch (SQLException e1) { - LOG.info("Got SQLException " + e1.getMessage()); - } finally { - // Statements and PreparedStatements - int i = 0; - while (!statements.isEmpty()) { - // PreparedStatement extend Statement - Statement st = statements.remove(i); - try { - if (st != null) { - st.close(); - st = null; - } - } catch (SQLException sqle) { - } - } - - //Connection - try { - if (conn != null) { - conn.close(); - conn = null; - } - } catch (SQLException sqle) { - } - } - } - public QOutProcessor getQOutProcessor() { return qOutProcessor; } http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index d940cdd..59130ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -121,7 +121,7 @@ public abstract class BaseSemanticAnalyzer { protected final Hive db; protected final HiveConf conf; protected final QueryState queryState; - protected List<Task<? extends Serializable>> rootTasks; + protected List<Task<?>> rootTasks; protected FetchTask fetchTask; protected final Logger LOG; protected final LogHelper console; http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 10982dd..27efece 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12211,7 +12211,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { fetchTask = pCtx.getFetchTask(); } //find all Acid FileSinkOperatorS - QueryPlanPostProcessor qp = new QueryPlanPostProcessor((List<Task<?>>)rootTasks, acidFileSinks, ctx.getExecutionId()); + QueryPlanPostProcessor qp = new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId()); LOG.info("Completed plan generation"); // 10. put accessed columns to readEntity http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query11.q.out b/ql/src/test/results/clientpositive/perf/spark/query11.q.out index 227068c..e46aa21 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query11.q.out @@ -300,7 +300,7 @@ STAGE PLANS: alias: date_dim Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -335,7 +335,7 @@ STAGE PLANS: alias: date_dim Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -577,14 +577,21 @@ STAGE PLANS: Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(18,2)) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col7 Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(18,2)) + Filter Operator + predicate: (_col7 > 0) (type: boolean) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(18,2)) Reducer 3 Reduce Operator Tree: Join Operator @@ -621,21 +628,14 @@ STAGE PLANS: Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(18,2)) - outputColumnNames: _col0, _col7 + outputColumnNames: _col0, _col1 Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col7 > 0) (type: boolean) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(18,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(18,2)) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(18,2)) Reducer 5 Reduce Operator Tree: Join Operator @@ -651,7 +651,7 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col5, _col6, _col8 Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col8 / _col1) > (_col6 / _col3)) (type: boolean) + predicate: ((_col1 / _col8) > (_col6 / _col3)) (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053278 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query15.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out b/ql/src/test/results/clientpositive/perf/spark/query15.q.out index e8ffcb1..c54b95a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query15.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out @@ -44,16 +44,52 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 305), Map 7 (PARTITION-LEVEL SORT, 305) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 873), Reducer 2 (PARTITION-LEVEL SORT, 873) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 686), Reducer 3 (PARTITION-LEVEL SORT, 686) - Reducer 5 <- Reducer 4 (GROUP, 406) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 927), Reducer 8 (PARTITION-LEVEL SORT, 927) + Reducer 4 <- Reducer 3 (GROUP, 369) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 305), Map 9 (PARTITION-LEVEL SORT, 305) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Map 7 + Map Operator Tree: + TableScan alias: catalog_sales Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -69,7 +105,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Map 7 + Map 9 Map Operator Tree: TableScan alias: date_dim @@ -86,124 +122,72 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: customer - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 9 - Map Operator Tree: - TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col3, _col4 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string), _col4 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col7 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4, _col7 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: int) - sort order: + - Map-reduce partition columns: _col7 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col7 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col9, _col10 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col2 > 500) or (_col9) IN ('CA', 'WA', 'GA') or (substr(_col10, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: decimal(7,2)), _col10 (type: string) - outputColumnNames: _col2, _col10 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + expressions: _col4 (type: string), _col7 (type: decimal(7,2)) + outputColumnNames: _col4, _col7 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: string) + aggregations: sum(_col7) + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 5 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 6 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -214,6 +198,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query16.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out index 625faff..743f27d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out @@ -60,7 +60,8 @@ limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -86,7 +87,12 @@ STAGE PLANS: 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 9 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -109,7 +115,7 @@ STAGE PLANS: Spark Edges: Reducer 13 <- Map 12 (GROUP, 24) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 464), Map 8 (PARTITION-LEVEL SORT, 464) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 9 (PARTITION-LEVEL SORT, 494) Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 711), Reducer 2 (PARTITION-LEVEL SORT, 711) Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 459), Reducer 3 (PARTITION-LEVEL SORT, 459) Reducer 5 <- Reducer 4 (GROUP, 246) @@ -129,12 +135,24 @@ STAGE PLANS: expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 8 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Local Work: + Map Reduce Local Work Map 11 Map Operator Tree: TableScan @@ -176,7 +194,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer_address @@ -220,34 +238,24 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col3, _col4, _col5, _col6 input vertices: - 1 Map 9 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6 - input vertices: - 1 Map 10 + 1 Map 10 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query18.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out index c7dee3d..cb3c114 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out @@ -72,53 +72,52 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 307), Map 13 (PARTITION-LEVEL SORT, 307) - Reducer 12 <- Map 14 (PARTITION-LEVEL SORT, 336), Reducer 11 (PARTITION-LEVEL SORT, 336) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 428), Map 8 (PARTITION-LEVEL SORT, 428) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 302), Reducer 2 (PARTITION-LEVEL SORT, 302) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 694), Reducer 3 (PARTITION-LEVEL SORT, 694) - Reducer 5 <- Map 15 (PARTITION-LEVEL SORT, 411), Reducer 4 (PARTITION-LEVEL SORT, 411) - Reducer 6 <- Reducer 5 (GROUP, 1009) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 337), Reducer 10 (PARTITION-LEVEL SORT, 337) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 374), Reducer 11 (PARTITION-LEVEL SORT, 374) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 428), Map 7 (PARTITION-LEVEL SORT, 428) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 302), Reducer 2 (PARTITION-LEVEL SORT, 302) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 731), Reducer 3 (PARTITION-LEVEL SORT, 731) + Reducer 5 <- Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Map 10 + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int) + Map 13 Map Operator Tree: TableScan - alias: catalog_sales - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) - Map 13 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map 14 Map Operator Tree: TableScan alias: cd1 @@ -136,23 +135,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int) - Map 14 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Map 15 Map Operator Tree: TableScan @@ -171,25 +153,25 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 8 + Map 7 Map Operator Tree: TableScan - alias: customer - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int) - Map 9 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map 8 Map Operator Tree: TableScan alias: cd2 @@ -206,102 +188,120 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + Reducer 10 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col12 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col12 (type: int) - Reducer 12 + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + Reducer 11 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col12 + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col14 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col12 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col14 + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8, _col14, _col16 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col16 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int) + outputColumnNames: _col1, _col3, _col6, _col7, _col8, _col9, _col10, _col16 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col16 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col8 + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4, _col6, _col7, _col8 Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col5 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col5 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col8 (type: int) + value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col8 + outputColumnNames: _col0, _col4, _col6, _col7, _col8 Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col4 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int) + value expressions: _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) + 0 _col0 (type: int) 1 _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col8, _col15, _col16, _col17, _col18, _col19, _col20, _col24 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col15 (type: int) - sort order: + - Map-reduce partition columns: _col15 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int), _col16 (type: int), _col17 (type: decimal(7,2)), _col18 (type: decimal(7,2)), _col19 (type: decimal(7,2)), _col20 (type: decimal(7,2)), _col24 (type: int) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col15 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col8, _col16, _col17, _col18, _col19, _col20, _col24, _col26 + outputColumnNames: _col4, _col6, _col7, _col8, _col11, _col16, _col17, _col18, _col19, _col20, _col26 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col26 (type: string), _col3 (type: string), _col2 (type: string), _col1 (type: string), CAST( _col16 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col17 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col19 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col18 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col20 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col8 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col24 AS decimal(12,2)) (type: decimal(12,2)) + expressions: _col11 (type: string), _col8 (type: string), _col7 (type: string), _col6 (type: string), CAST( _col16 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col17 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col19 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col18 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col20 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col4 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col26 AS decimal(12,2)) (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -316,7 +316,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(22,2)), _col6 (type: bigint), _col7 (type: decimal(22,2)), _col8 (type: bigint), _col9 (type: decimal(22,2)), _col10 (type: bigint), _col11 (type: decimal(22,2)), _col12 (type: bigint), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: decimal(22,2)), _col16 (type: bigint), _col17 (type: decimal(22,2)), _col18 (type: bigint) - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), count(VALUE._col13) @@ -335,7 +335,7 @@ STAGE PLANS: Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(38,18)), _col5 (type: decimal(38,18)), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)), _col8 (type: decimal(38,18)), _col9 (type: decimal(38,18)), _col10 (type: decimal(38,18)) - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(38,18)), VALUE._col2 (type: decimal(38,18)), VALUE._col3 (type: decimal(38,18)), VALUE._col4 (type: decimal(38,18)), VALUE._col5 (type: decimal(38,18)), VALUE._col6 (type: decimal(38,18))