HIVE-18118: Explain Extended should indicate if a file being read is an EC file (Andrew Sherman, reviewed by Sahil Takiar)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c2940a07 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c2940a07 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c2940a07 Branch: refs/heads/master Commit: c2940a07cf0891e922672782b73ec22551a7eedd Parents: e865b44 Author: Andrew Sherman <asher...@cloudera.com> Authored: Thu Jul 5 16:08:16 2018 -0500 Committer: Sahil Takiar <stak...@cloudera.com> Committed: Thu Jul 5 16:08:16 2018 -0500 ---------------------------------------------------------------------- .../hive/common/util/HiveStringUtils.java | 21 +- .../apache/hive/jdbc/TestJdbcWithMiniHS2.java | 2 +- .../jdbc/TestJdbcWithMiniHS2ErasureCoding.java | 215 ++++++++++ .../test/resources/testconfiguration.properties | 3 +- .../org/apache/hadoop/hive/ql/QTestUtil.java | 4 +- .../org/apache/hive/jdbc/miniHS2/MiniHS2.java | 23 +- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 44 +- .../ql/metadata/SessionHiveMetaStoreClient.java | 2 +- .../formatting/MetaDataFormatUtils.java | 11 +- .../formatting/TextMetaDataFormatter.java | 10 + .../optimizer/spark/SparkMapJoinOptimizer.java | 2 +- .../stats/annotation/StatsRulesProcFactory.java | 2 +- .../hadoop/hive/ql/plan/PartitionDesc.java | 4 +- .../apache/hadoop/hive/ql/plan/PlanUtils.java | 31 ++ .../apache/hadoop/hive/ql/plan/Statistics.java | 12 +- .../apache/hadoop/hive/ql/plan/TableDesc.java | 3 +- .../hive/ql/stats/BasicStatsNoJobTask.java | 7 +- .../hadoop/hive/ql/stats/BasicStatsTask.java | 4 +- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 20 +- .../hive/ql/txn/compactor/CompactorMR.java | 4 +- .../queries/clientpositive/erasure_explain.q | 24 ++ .../queries/clientpositive/erasure_simple.q | 13 + .../clientnegative/unset_table_property.q.out | 1 + .../materialized_view_create_rewrite.q.out | 2 + .../clientpositive/druid/druidmini_mv.q.out | 3 + .../erasurecoding/erasure_explain.q.out | 409 +++++++++++++++++++ .../erasurecoding/erasure_simple.q.out | 20 +- .../llap/materialized_view_create.q.out | 1 + .../llap/materialized_view_create_rewrite.q.out | 2 + ...materialized_view_create_rewrite_dummy.q.out | 2 + ...erialized_view_create_rewrite_multi_db.q.out | 2 + .../llap/materialized_view_describe.q.out | 1 + .../clientpositive/show_tblproperties.q.out | 5 + .../unset_table_view_property.q.out | 9 + .../hadoop/hive/common/StatsSetupConst.java | 21 +- .../hadoop/hive/metastore/HiveMetaStore.java | 3 +- .../hive/metastore/utils/MetaStoreUtils.java | 11 +- .../metastore/utils/TestMetaStoreUtils.java | 37 +- 38 files changed, 877 insertions(+), 113 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/common/src/java/org/apache/hive/common/util/HiveStringUtils.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index cfe9b22..6b14ad9 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -31,19 +31,15 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.HashMap; import java.util.Locale; -import java.util.Properties; +import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Pattern; import com.google.common.base.Splitter; -import com.google.common.collect.Interner; -import com.google.common.collect.Interners; - import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.text.translate.CharSequenceTranslator; import org.apache.commons.lang3.text.translate.EntityArrays; @@ -1062,19 +1058,6 @@ public class HiveStringUtils { return identifier.trim().toLowerCase(); } - public static Map getPropertiesExplain(Properties properties) { - if (properties != null) { - String value = properties.getProperty("columns.comments"); - if (value != null) { - // should copy properties first - Map clone = new HashMap(properties); - clone.put("columns.comments", quoteComments(value)); - return clone; - } - } - return properties; - } - public static String quoteComments(String value) { char[] chars = value.toCharArray(); if (!commentProvided(chars)) { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index d7d7097..7ef2ced 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -1658,7 +1658,7 @@ public class TestJdbcWithMiniHS2 { /** * Get Detailed Table Information via jdbc */ - private String getDetailedTableDescription(Statement stmt, String table) throws SQLException { + static String getDetailedTableDescription(Statement stmt, String table) throws SQLException { String extendedDescription = null; try (ResultSet rs = stmt.executeQuery("describe extended " + table)) { while (rs.next()) { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java new file mode 100644 index 0000000..b0a0145 --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.jdbc; + +import java.io.IOException; +import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Collections; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.HadoopShims.HdfsErasureCodingShim; +import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.apache.hadoop.hive.ql.QTestUtil.DEFAULT_TEST_EC_POLICY; +import static org.apache.hive.jdbc.TestJdbcWithMiniHS2.getDetailedTableDescription; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Run erasure Coding tests with jdbc. + */ +public class TestJdbcWithMiniHS2ErasureCoding { + private static final String DB_NAME = "ecTestDb"; + private static MiniHS2 miniHS2 = null; + private static HiveConf conf; + private Connection hs2Conn = null; + + private static HiveConf createHiveOnSparkConf() { + HiveConf hiveConf = new HiveConf(); + // Tell dfs not to consider load when choosing a datanode as this can cause failure as + // in a test we do not have spare datanode capacity. + hiveConf.setBoolean("dfs.namenode.redundancy.considerLoad", false); + hiveConf.set("hive.execution.engine", "spark"); + hiveConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + hiveConf.set("spark.master", "local-cluster[2,2,1024]"); + hiveConf.set("hive.spark.client.connect.timeout", "30000ms"); + hiveConf.set("spark.local.dir", + Paths.get(System.getProperty("test.tmp.dir"), "TestJdbcWithMiniHS2ErasureCoding-local-dir") + .toString()); + hiveConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); // avoid ZK errors + return hiveConf; + } + + /** + * Setup a mini HS2 with miniMR. + */ + @BeforeClass + public static void beforeTest() throws Exception { + Class.forName(MiniHS2.getJdbcDriverName()); + conf = createHiveOnSparkConf(); + DriverManager.setLoginTimeout(0); + miniHS2 = new MiniHS2.Builder() + .withConf(conf) + .withMiniMR() + .withDataNodes(5) // sufficient for RS-3-2-1024k + .build(); + miniHS2.start(Collections.emptyMap()); + createDb(); + MiniDFSShim dfs = miniHS2.getDfs(); + addErasurePolicy(dfs, "hdfs:///", DEFAULT_TEST_EC_POLICY); + } + + /** + * Shutdown the mini HS2. + */ + @AfterClass + public static void afterTest() { + if (miniHS2 != null && miniHS2.isStarted()) { + miniHS2.stop(); + } + } + + /** + * Setup a connection to the test database before each test. + */ + @Before + public void setUp() throws Exception { + hs2Conn = DriverManager.getConnection(miniHS2.getJdbcURL(DB_NAME), + System.getProperty("user.name"), "bar"); + } + + /** + * Close connection after each test. + */ + @After + public void tearDown() throws Exception { + if (hs2Conn != null) { + hs2Conn.close(); + } + } + + /** + * Create a database. + */ + private static void createDb() throws Exception { + try (Connection conn = DriverManager.getConnection(miniHS2.getJdbcURL(), + System.getProperty("user.name"), "bar"); + Statement stmt2 = conn.createStatement()) { + stmt2.execute("DROP DATABASE IF EXISTS " + DB_NAME + " CASCADE"); + stmt2.execute("CREATE DATABASE " + DB_NAME); + } + } + + /** + * Test EXPLAIN on fs with Erasure Coding. + */ + @Test + public void testExplainErasureCoding() throws Exception { + try (Statement stmt = hs2Conn.createStatement()) { + String tableName = "pTableEc"; + stmt.execute( + " CREATE TABLE " + tableName + " (userid VARCHAR(64), link STRING, source STRING) " + + "PARTITIONED BY (datestamp STRING, i int) " + + "CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET"); + // insert data to create 2 partitions + stmt.execute("INSERT INTO TABLE " + tableName + + " PARTITION (datestamp = '2014-09-23', i = 1)(userid,link) VALUES ('jsmith', 'mail.com')"); + stmt.execute("INSERT INTO TABLE " + tableName + + " PARTITION (datestamp = '2014-09-24', i = 2)(userid,link) VALUES ('mac', 'superchunk.com')"); + String explain = getExtendedExplain(stmt, "select userid from " + tableName); + assertMatchAndCount(explain, " numFiles 4", 2); + assertMatchAndCount(explain, " numFilesErasureCoded 4", 2); + } + } + + /** + * Test DESCRIBE on fs with Erasure Coding. + */ + @Test + public void testDescribeErasureCoding() throws Exception { + try (Statement stmt = hs2Conn.createStatement()) { + String table = "pageviews"; + stmt.execute(" CREATE TABLE " + table + " (userid VARCHAR(64), link STRING, source STRING) " + + "PARTITIONED BY (datestamp STRING, i int) CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET"); + stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-23', i = 1)" + + "(userid,link) VALUES ('jsmith', 'mail.com')"); + stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-24', i = 1)" + + "(userid,link) VALUES ('dpatel', 'gmail.com')"); + String description = getDetailedTableDescription(stmt, table); + assertMatchAndCount(description, "numFiles=8", 1); + assertMatchAndCount(description, "numFilesErasureCoded=8", 1); + assertMatchAndCount(description, "numPartitions=2", 1); + } + } + + /** + * Add a Erasure Coding Policy to a Path. + */ + private static void addErasurePolicy(MiniDFSShim dfs, String pathString, String policyName) throws IOException { + HadoopShims hadoopShims = ShimLoader.getHadoopShims(); + HdfsErasureCodingShim erasureCodingShim = hadoopShims.createHdfsErasureCodingShim(dfs.getFileSystem(), conf); + erasureCodingShim.enableErasureCodingPolicy(policyName); + Path fsRoot = new Path(pathString); + erasureCodingShim.setErasureCodingPolicy(fsRoot, policyName); + HadoopShims.HdfsFileErasureCodingPolicy erasureCodingPolicy = + erasureCodingShim.getErasureCodingPolicy(fsRoot); + assertEquals(policyName, erasureCodingPolicy.getName()); + } + + /** + * Get Extended Explain output via jdbc. + */ + private static String getExtendedExplain(Statement stmt, String query) throws SQLException { + StringBuilder sb = new StringBuilder(2048); + try (ResultSet rs = stmt.executeQuery("explain extended " + query)) { + while (rs.next()) { + sb.append(rs.getString(1)).append('\n'); + } + } + return sb.toString(); + } + + /** + * Check that the expected string occurs correctly in the output string. + * @param output string to probe + * @param expectedString string to find in output + * @param expectedCount the expected number of occurrences of the expected string + */ + private void assertMatchAndCount(String output, String expectedString, int expectedCount) { + assertTrue("Did not find expected '" + expectedString + "' in text " + + output, output.contains(expectedString)); + assertEquals("wrong count of matches of '" + expectedString + "' in text " + + output, expectedCount, StringUtils.countMatches(output, expectedString)); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 978a806..fedb77b 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1714,4 +1714,5 @@ druid.kafka.query.files=druidkafkamini_basic.q erasurecoding.shared.query.files=erasure_commands.q # tests to be run only by TestErasureCodingHDFSCliDriver -erasurecoding.only.query.files=erasure_simple.q +erasurecoding.only.query.files=erasure_simple.q,\ + erasure_explain.q http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 2dfd2aa..98aae5c 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -150,9 +150,9 @@ public class QTestUtil { public static final String TEST_HIVE_USER_PROPERTY = "test.hive.user"; /** - * The Erasure Coding Policy to use in TestErasureCodingHDFSCliDriver. + * The default Erasure Coding Policy to use in Erasure Coding tests. */ - private static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k"; + public static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k"; private String testWarehouse; @Deprecated http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java b/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java index 1700c08..a78dd73 100644 --- a/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java +++ b/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java @@ -66,6 +66,7 @@ public class MiniHS2 extends AbstractHiveService { private static final FsPermission FULL_PERM = new FsPermission((short)00777); private static final FsPermission WRITE_ALL_PERM = new FsPermission((short)00733); private static final String tmpDir = System.getProperty("test.tmp.dir"); + private static final int DEFAULT_DATANODE_COUNT = 4; private HiveServer2 hiveServer2 = null; private final File baseDir; private final Path baseFsDir; @@ -104,6 +105,7 @@ public class MiniHS2 extends AbstractHiveService { private boolean isMetastoreSecure; private String metastoreServerPrincipal; private String metastoreServerKeyTab; + private int dataNodes = DEFAULT_DATANODE_COUNT; // default number of datanodes for miniHS2 public Builder() { } @@ -162,6 +164,16 @@ public class MiniHS2 extends AbstractHiveService { return this; } + /** + * Set the number of datanodes to be used by HS2. + * @param count the number of datanodes + * @return this Builder + */ + public Builder withDataNodes(int count) { + this.dataNodes = count; + return this; + } + public MiniHS2 build() throws Exception { if (miniClusterType == MiniClusterType.MR && useMiniKdc) { throw new IOException("Can't create secure miniMr ... yet"); @@ -173,7 +185,7 @@ public class MiniHS2 extends AbstractHiveService { } return new MiniHS2(hiveConf, miniClusterType, useMiniKdc, serverPrincipal, serverKeytab, isMetastoreRemote, usePortsFromConf, authType, isHA, cleanupLocalDirOnStartup, - isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab); + isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab, dataNodes); } } @@ -212,9 +224,8 @@ public class MiniHS2 extends AbstractHiveService { private MiniHS2(HiveConf hiveConf, MiniClusterType miniClusterType, boolean useMiniKdc, String serverPrincipal, String serverKeytab, boolean isMetastoreRemote, boolean usePortsFromConf, String authType, boolean isHA, boolean cleanupLocalDirOnStartup, - boolean isMetastoreSecure, - String metastoreServerPrincipal, - String metastoreKeyTab) throws Exception { + boolean isMetastoreSecure, String metastoreServerPrincipal, String metastoreKeyTab, + int dataNodes) throws Exception { // Always use localhost for hostname as some tests like SSL CN validation ones // are tied to localhost being present in the certificate name super( @@ -242,7 +253,7 @@ public class MiniHS2 extends AbstractHiveService { if (miniClusterType != MiniClusterType.LOCALFS_ONLY) { // Initialize dfs - dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, 4, true, null, isHA); + dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, dataNodes, true, null, isHA); fs = dfs.getFileSystem(); String uriString = fs.getUri().toString(); @@ -334,7 +345,7 @@ public class MiniHS2 extends AbstractHiveService { throws Exception { this(hiveConf, clusterType, false, null, null, false, usePortsFromConf, "KERBEROS", false, true, - false, null, null); + false, null, null, DEFAULT_DATANODE_COUNT); } public void start(Map<String, String> confOverlay) throws Exception { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index b7babd6..ba0070d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -60,16 +60,12 @@ import com.google.common.util.concurrent.ListenableFuture; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; -import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -293,39 +289,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.stringtemplate.v4.ST; -import java.io.BufferedWriter; -import java.io.DataOutputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.io.Writer; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.ExecutionException; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; /** * DDLTask implementation. @@ -2713,7 +2677,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { String tbl_location = " '" + HiveStringUtils.escapeHiveCommand(sd.getLocation()) + "'"; // Table properties - duplicateProps.addAll(Arrays.asList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS)); + duplicateProps.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS); String tbl_properties = propertiesToString(tbl.getParameters(), duplicateProps); createTab_stmt.add(TEMPORARY, tbl_temp); @@ -3679,7 +3643,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { Map<String,String> tblProps = tbl.getParameters() == null ? new HashMap<String,String>() : tbl.getParameters(); Map<String, Long> valueMap = new HashMap<>(); Map<String, Boolean> stateMap = new HashMap<>(); - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { valueMap.put(stat, 0L); stateMap.put(stat, true); } @@ -3688,7 +3652,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { for (Partition partition : parts) { Map<String, String> props = partition.getParameters(); Boolean state = StatsSetupConst.areBasicStatsUptoDate(props); - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { stateMap.put(stat, stateMap.get(stat) && state); if (props != null && props.get(stat) != null) { valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat))); @@ -3696,7 +3660,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { } numParts++; } - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat))); tblProps.put(stat, valueMap.get(stat).toString()); } http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 58c8960..5d382ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -593,7 +593,7 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I return false; } boolean statsPresent = false; - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { String statVal = props.get(stat); if (statVal != null && Long.parseLong(statVal) > 0) { statsPresent = true; http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 7af6dab..36cd46a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -474,10 +475,16 @@ public final class MetaDataFormatUtils { List<String> keys = new ArrayList<String>(params.keySet()); Collections.sort(keys); for (String key : keys) { + String value = params.get(key); + if (key.equals(StatsSetupConst.NUM_ERASURE_CODED_FILES)) { + if ("0".equals(value)) { + continue; + } + } tableInfo.append(FIELD_DELIM); // Ensures all params are indented. formatOutput(key, - escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key)) - : HiveStringUtils.escapeJava(params.get(key)), + escapeUnicode ? StringEscapeUtils.escapeJava(value) + : HiveStringUtils.escapeJava(value), tableInfo, isOutputPadded); } } http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index 326cbed..705365b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -366,6 +366,7 @@ class TextMetaDataFormatter implements MetaDataFormatter { public long lastAccessTime = 0; public long lastUpdateTime = 0; public int numOfFiles = 0; + int numOfErasureCodedFiles = 0; } // TODO: why is this in text formatter?!! @@ -416,6 +417,12 @@ class TextMetaDataFormatter implements MetaDataFormatter { outStream.write((unknown ? unknownString : "" + fd.numOfFiles).getBytes("UTF-8")); outStream.write(terminator); + if (fd.numOfErasureCodedFiles > 0) { + outStream.write("totalNumberErasureCodedFiles:".getBytes("UTF-8")); + outStream.write((unknown ? unknownString : "" + fd.numOfErasureCodedFiles).getBytes("UTF-8")); + outStream.write(terminator); + } + for (int k = 0; k < indent; k++) { outStream.write(Utilities.INDENT.getBytes("UTF-8")); } @@ -473,6 +480,9 @@ class TextMetaDataFormatter implements MetaDataFormatter { continue; } fd.numOfFiles++; + if (currentStatus.isErasureCoded()) { + fd.numOfErasureCodedFiles++; + } long fileLen = currentStatus.getLen(); fd.totalFileSize += fileLen; if (fileLen > fd.maxFileSize) { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 8e75db9..689c888 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -216,7 +216,7 @@ public class SparkMapJoinOptimizer implements NodeProcessor { LOG.debug("Found a big table branch with parent operator {} and position {}", parentOp, pos); bigTablePosition = pos; bigTableFound = true; - bigInputStat = new Statistics(0, Long.MAX_VALUE); + bigInputStat = new Statistics(0, Long.MAX_VALUE, 0); } else { // Either we've found multiple big table branches, or the current branch cannot // be a big table branch. Disable mapjoin for these cases. http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index d0be33b..3c2b085 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1818,7 +1818,7 @@ public class StatsRulesProcFactory { } } - Statistics wcStats = new Statistics(newNumRows, newDataSize); + Statistics wcStats = new Statistics(newNumRows, newDataSize, 0); wcStats.setBasicStatsState(statsState); // evaluate filter expression and update statistics http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 61458b4..821e428 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -45,10 +45,8 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hive.common.util.ReflectionUtil; -import org.apache.hive.common.util.HiveStringUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; - /** * PartitionDesc. * @@ -221,7 +219,7 @@ public class PartitionDesc implements Serializable, Cloneable { @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { - return HiveStringUtils.getPropertiesExplain(getProperties()); + return PlanUtils.getPropertiesExplain(getProperties()); } public void setProperties(final Properties properties) { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 2c5b655..250a085 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -31,6 +32,7 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.LlapOutputFormat; @@ -78,6 +80,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hive.common.util.HiveStringUtils.quoteComments; /** * PlanUtils. @@ -1203,4 +1206,32 @@ public final class PlanUtils { public static Class<? extends AbstractSerDe> getDefaultSerDe() { return LazySimpleSerDe.class; } + + /** + * Get a Map of table or partition properties to be used in explain extended output. + * Do some filtering to make output readable and/or concise. + */ + static Map getPropertiesExplain(Properties properties) { + if (properties != null) { + Map<Object, Object> clone = null; + String value = properties.getProperty("columns.comments"); + if (value != null) { + // should copy properties first + clone = new HashMap<>(properties); + clone.put("columns.comments", quoteComments(value)); + } + value = properties.getProperty(StatsSetupConst.NUM_ERASURE_CODED_FILES); + if ("0".equals(value)) { + // should copy properties first + if (clone == null) { + clone = new HashMap<>(properties); + } + clone.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); + } + if (clone != null) { + return clone; + } + } + return properties; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 6babe49..bc5f9d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -53,18 +53,20 @@ public class Statistics implements Serializable { private long numRows; private long runTimeNumRows; private long dataSize; + private long numErasureCodedFiles; private State basicStatsState; private Map<String, ColStatistics> columnStats; private State columnStatsState; private boolean runtimeStats; public Statistics() { - this(0, 0); + this(0, 0, 0); } - public Statistics(long nr, long ds) { + public Statistics(long nr, long ds, long numEcFiles) { numRows = nr; dataSize = ds; + numErasureCodedFiles = numEcFiles; runTimeNumRows = -1; columnStats = null; columnStatsState = State.NONE; @@ -137,6 +139,10 @@ public class Statistics implements Serializable { } sb.append(" Data size: "); sb.append(dataSize); + if (numErasureCodedFiles > 0) { + sb.append(" Erasure files: "); + sb.append(numErasureCodedFiles); + } sb.append(" Basic stats: "); sb.append(basicStatsState); sb.append(" Column stats: "); @@ -185,7 +191,7 @@ public class Statistics implements Serializable { @Override public Statistics clone() { - Statistics clone = new Statistics(numRows, dataSize); + Statistics clone = new Statistics(numRows, dataSize, numErasureCodedFiles); clone.setRunTimeNumRows(runTimeNumRows); clone.setBasicStatsState(basicStatsState); clone.setColumnStatsState(columnStatsState); http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 4068e56..bbce940 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -127,7 +126,7 @@ public class TableDesc implements Serializable, Cloneable { @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { - return HiveStringUtils.getPropertiesExplain(getProperties()); + return PlanUtils.getPropertiesExplain(getProperties()); } public void setProperties(final Properties properties) { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java index d4d46a3..3128ee8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java @@ -163,6 +163,7 @@ public class BasicStatsNoJobTask implements IStatsProcessor { long rawDataSize = 0; long fileSize = 0; long numFiles = 0; + long numErasureCodedFiles = 0; // Note: this code would be invalid for transactional tables of any kind. Utilities.FILE_OP_LOGGER.debug("Aggregating stats for {}", dir); List<FileStatus> fileList = null; @@ -190,6 +191,9 @@ public class BasicStatsNoJobTask implements IStatsProcessor { numRows += statsRR.getStats().getRowCount(); fileSize += file.getLen(); numFiles += 1; + if (file.isErasureCoded()) { + numErasureCodedFiles++; + } } else { throw new HiveException(String.format("Unexpected file found during reading footers for: %s ", file)); } @@ -206,6 +210,7 @@ public class BasicStatsNoJobTask implements IStatsProcessor { parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); + parameters.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, String.valueOf(numErasureCodedFiles)); if (partish.getPartition() != null) { result = new Partition(partish.getTable(), partish.getPartition().getTPartition()); @@ -224,7 +229,7 @@ public class BasicStatsNoJobTask implements IStatsProcessor { private String toString(Map<String, String> parameters) { StringBuilder builder = new StringBuilder(); - for (String statType : StatsSetupConst.supportedStats) { + for (String statType : StatsSetupConst.SUPPORTED_STATS) { String value = parameters.get(statType); if (value != null) { if (builder.length() > 0) { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index f31c170..0db90b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -207,7 +207,7 @@ public class BasicStatsTask implements Serializable, IStatsProcessor { private void updateStats(StatsAggregator statsAggregator, Map<String, String> parameters, String aggKey) throws HiveException { - for (String statType : StatsSetupConst.statsRequireCompute) { + for (String statType : StatsSetupConst.STATS_REQUIRE_COMPUTE) { String value = statsAggregator.aggregateStats(aggKey, statType); if (value != null && !value.isEmpty()) { long longValue = Long.parseLong(value); @@ -411,7 +411,7 @@ public class BasicStatsTask implements Serializable, IStatsProcessor { private String toString(Map<String, String> parameters) { StringBuilder builder = new StringBuilder(); - for (String statType : StatsSetupConst.supportedStats) { + for (String statType : StatsSetupConst.SUPPORTED_STATS) { String value = parameters.get(statType); if (value != null) { if (builder.length() > 0) { http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 494939a..95a4440 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -261,6 +261,8 @@ public class StatsUtils { long nr = basicStats.getNumRows(); List<ColStatistics> colStats = Lists.newArrayList(); + long numErasureCodedFiles = getErasureCodedFiles(table); + if (fetchColStats) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); if(colStats == null) { @@ -273,7 +275,7 @@ public class StatsUtils { long betterDS = getDataSizeFromColumnStats(nr, colStats); ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; } - stats = new Statistics(nr, ds); + stats = new Statistics(nr, ds, numErasureCodedFiles); // infer if any column can be primary key based on column statistics inferAndSetPrimaryKey(stats.getNumRows(), colStats); @@ -308,10 +310,14 @@ public class StatsUtils { long nr = bbs.getNumRows(); long ds = bbs.getDataSize(); + List<Long> erasureCodedFiles = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), + StatsSetupConst.NUM_ERASURE_CODED_FILES); + long numErasureCodedFiles = getSumIgnoreNegatives(erasureCodedFiles); + if (nr == 0) { nr=1; } - stats = new Statistics(nr, ds); + stats = new Statistics(nr, ds, numErasureCodedFiles); stats.setBasicStatsState(bbs.getState()); if (nr > 0) { // FIXME: this promotion process should be removed later @@ -1656,6 +1662,14 @@ public class StatsUtils { } /** + * Get number of Erasure Coded files for a table + * @return count of EC files + */ + public static long getErasureCodedFiles(Table table) { + return getBasicStatForTable(table, StatsSetupConst.NUM_ERASURE_CODED_FILES); + } + + /** * Get basic stats of table * @param table * - table @@ -1782,7 +1796,7 @@ public class StatsUtils { } /** - * Get qualified column name from output key column names + * Get qualified column name from output key column names. * @param keyExprs * - output key names * @return list of qualified names http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 6044719..611f85a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -21,8 +21,6 @@ import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.util.HashSet; -import com.google.common.collect.Lists; - import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -469,7 +467,7 @@ public class CompactorMR { HiveStringUtils.escapeHiveCommand(location)).append("' TBLPROPERTIES ("); // Exclude all standard table properties. Set<String> excludes = getHiveMetastoreConstants(); - excludes.addAll(Lists.newArrayList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS)); + excludes.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS); isFirst = true; for (Map.Entry<String, String> e : t.getParameters().entrySet()) { if (e.getValue() == null) continue; http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/queries/clientpositive/erasure_explain.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/erasure_explain.q b/ql/src/test/queries/clientpositive/erasure_explain.q new file mode 100644 index 0000000..e2954d4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/erasure_explain.q @@ -0,0 +1,24 @@ +--! qt:dataset:src +--! qt:dataset:srcpart +-- Test explain diagnostics with Erasure Coding + +ERASURE echo listPolicies originally was; +ERASURE listPolicies; + +show table extended like srcpart; + +desc formatted srcpart; + +explain select key, value from srcpart; + +explain extended select key, value from srcpart; + +show table extended like src; + +desc formatted src; + +explain select key, value from src; + +explain extended select key, value from src; + + http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/queries/clientpositive/erasure_simple.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/erasure_simple.q b/ql/src/test/queries/clientpositive/erasure_simple.q index c08409c..cc886c2 100644 --- a/ql/src/test/queries/clientpositive/erasure_simple.q +++ b/ql/src/test/queries/clientpositive/erasure_simple.q @@ -5,6 +5,7 @@ ERASURE echo listPolicies originally was; ERASURE listPolicies; ERASURE enablePolicy --policy RS-10-4-1024k; +ERASURE enablePolicy --policy XOR-2-1-1024k; ERASURE echo listPolicies after enablePolicy; ERASURE listPolicies; @@ -25,8 +26,20 @@ ERASURE getPolicy --path hdfs:///tmp/erasure_coding1; create table erasure_table (a int) location 'hdfs:///tmp/erasure_coding1/location1'; +-- insert some data with the default policy (RS-3-2-1024k) from the fs root insert into erasure_table values(4); + +-- set a new policy on the directory and insert some data +ERASURE setPolicy --path hdfs:///tmp/erasure_coding1 --policy XOR-2-1-1024k; +insert into erasure_table values(5); + +ERASURE echo policy on older file is; +ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0; +ERASURE echo policy on newer file is; +ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0_copy_1; + +-- show that data is present select * from erasure_table; drop table if exists erasure_table2; http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientnegative/unset_table_property.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/unset_table_property.q.out b/ql/src/test/results/clientnegative/unset_table_property.q.out index eb308eb..20378a1 100644 --- a/ql/src/test/results/clientnegative/unset_table_property.q.out +++ b/ql/src/test/results/clientnegative/unset_table_property.q.out @@ -24,6 +24,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out b/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out index ff02643..4db5d70 100644 --- a/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out +++ b/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out @@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index 2e44e14..383f2dc 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -89,6 +89,7 @@ bucketing_version 2 druid.datasource default.cmv_mat_view_n2 druid.segment.granularity HOUR numFiles 0 +numFilesErasureCoded 0 numRows 2 rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler @@ -136,6 +137,7 @@ bucketing_version 2 druid.datasource default.cmv_mat_view2_n0 druid.segment.granularity HOUR numFiles 0 +numFilesErasureCoded 0 numRows 3 rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler @@ -515,6 +517,7 @@ druid.datasource default.cmv_mat_view2_n0 druid.segment.granularity HOUR #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 3 rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out b/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out new file mode 100644 index 0000000..8ada9b6 --- /dev/null +++ b/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out @@ -0,0 +1,409 @@ +ECHO listPolicies originally was +Policy: RS-10-4-1024k DISABLED +Policy: RS-3-2-1024k ENABLED +Policy: RS-6-3-1024k ENABLED +Policy: RS-LEGACY-6-3-1024k DISABLED +Policy: XOR-2-1-1024k DISABLED +PREHOOK: query: show table extended like srcpart +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like srcpart +POSTHOOK: type: SHOW_TABLESTATUS +tableName:srcpart +#### A masked pattern was here #### +location:hdfs://### HDFS PATH ### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:true +partitionColumns:struct partition_columns { string ds, string hr} +totalNumberFiles:4 +totalNumberErasureCodedFiles:4 +totalFileSize:23248 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: desc formatted srcpart +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@srcpart +POSTHOOK: query: desc formatted srcpart +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@srcpart +# col_name data_type comment +key string default +value string default + +# Partition Information +# col_name data_type comment +ds string +hr string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + bucketing_version 2 + numFiles 4 + numFilesErasureCoded 4 + numPartitions 4 + numRows 2000 + rawDataSize 21248 + totalSize 23248 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select key, value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select key, value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + location hdfs://### HDFS PATH ### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Processor Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: show table extended like src +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like src +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src +#### A masked pattern was here #### +location:hdfs://### HDFS PATH ### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalNumberErasureCodedFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: desc formatted src +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src +POSTHOOK: query: desc formatted src +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src +# col_name data_type comment +key string default +value string default + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + bucketing_version 2 + numFiles 1 + numFilesErasureCoded 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out b/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out index 01f6015..b44cb7d 100644 --- a/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out +++ b/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out @@ -5,12 +5,13 @@ Policy: RS-6-3-1024k ENABLED Policy: RS-LEGACY-6-3-1024k DISABLED Policy: XOR-2-1-1024k DISABLED Enabled EC policy 'RS-10-4-1024k' +Enabled EC policy 'XOR-2-1-1024k' ECHO listPolicies after enablePolicy Policy: RS-10-4-1024k ENABLED Policy: RS-3-2-1024k ENABLED Policy: RS-6-3-1024k ENABLED Policy: RS-LEGACY-6-3-1024k DISABLED -Policy: XOR-2-1-1024k DISABLED +Policy: XOR-2-1-1024k ENABLED ECHO original policy on erasure_coding1 EC policy is 'RS-3-2-1024k' ECHO set the default policy on erasure_coding1 @@ -39,6 +40,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@erasure_table POSTHOOK: Lineage: erasure_table.a SCRIPT [] +Set EC policy' XOR-2-1-1024k +PREHOOK: query: insert into erasure_table values(5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@erasure_table +POSTHOOK: query: insert into erasure_table values(5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@erasure_table +POSTHOOK: Lineage: erasure_table.a SCRIPT [] +ECHO policy on older file is +EC policy is 'RS-3-2-1024k' +ECHO policy on newer file is +EC policy is 'XOR-2-1-1024k' PREHOOK: query: select * from erasure_table PREHOOK: type: QUERY PREHOOK: Input: default@erasure_table @@ -48,6 +63,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@erasure_table POSTHOOK: Output: hdfs://### HDFS PATH ### 4 +5 PREHOOK: query: drop table if exists erasure_table2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table if exists erasure_table2 @@ -88,6 +104,7 @@ columns:struct columns { string key, string value} partitioned:false partitionColumns: totalNumberFiles:1 +totalNumberErasureCodedFiles:1 totalFileSize:5812 maxFileSize:5812 minFileSize:5812 @@ -100,6 +117,7 @@ POSTHOOK: query: SHOW TBLPROPERTIES erasure_table2 POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} numFiles 1 +numFilesErasureCoded 1 numRows 500 rawDataSize 5312 totalSize 5812 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out index 9a70096..95f8966 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out @@ -245,6 +245,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 key value numFiles 1 +numFilesErasureCoded 0 numRows 5 rawDataSize 1605 totalSize 703 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out index c3cd893..71adebb 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out @@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out index e2972f3..ce1c281 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out @@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out index e5fb23d..98f7437 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out @@ -91,6 +91,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 408 totalSize 457 @@ -124,6 +125,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucketing_version 2 numFiles 1 +numFilesErasureCoded 0 numRows 2 rawDataSize 232 totalSize 326 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out index 85092a0..c68c127 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out @@ -98,6 +98,7 @@ bucketing_version 2 comment this is the first view key foo numFiles 1 +numFilesErasureCoded 0 numRows 5 rawDataSize 580 totalSize 345 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/show_tblproperties.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/show_tblproperties.q.out b/ql/src/test/results/clientpositive/show_tblproperties.q.out index e4bda1d..83e1ebd 100644 --- a/ql/src/test/results/clientpositive/show_tblproperties.q.out +++ b/ql/src/test/results/clientpositive/show_tblproperties.q.out @@ -41,6 +41,7 @@ bar bar value bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true @@ -60,6 +61,7 @@ bar bar value bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true @@ -115,6 +117,7 @@ bar bar value bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true @@ -134,6 +137,7 @@ bar bar value1 bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true1 @@ -159,6 +163,7 @@ bar bar value1 bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 tmp true1 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/unset_table_view_property.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/unset_table_view_property.q.out b/ql/src/test/results/clientpositive/unset_table_view_property.q.out index 5887971..5d140d6 100644 --- a/ql/src/test/results/clientpositive/unset_table_view_property.q.out +++ b/ql/src/test/results/clientpositive/unset_table_view_property.q.out @@ -19,6 +19,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true"}} bucketing_version 2 numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -41,6 +42,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -61,6 +63,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2" bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -84,6 +87,7 @@ c 3 d 4 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -105,6 +109,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -125,6 +130,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2" bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -149,6 +155,7 @@ c 3 d 4 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -171,6 +178,7 @@ bucketing_version 2 c 3 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 @@ -192,6 +200,7 @@ a 1 bucketing_version 2 #### A masked pattern was here #### numFiles 0 +numFilesErasureCoded 0 numRows 0 rawDataSize 0 totalSize 0 http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 78ea01d..a7ca05a 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; +import com.google.common.collect.ImmutableList; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; @@ -105,6 +106,11 @@ public class StatsSetupConst { public static final String RAW_DATA_SIZE = "rawDataSize"; /** + * The name of the statistic for Number of Erasure Coded Files - to be published or gathered. + */ + public static final String NUM_ERASURE_CODED_FILES = "numFilesErasureCoded"; + + /** * Temp dir for writing stats from tasks. */ public static final String STATS_TMP_LOC = "hive.stats.tmp.loc"; @@ -113,18 +119,20 @@ public class StatsSetupConst { /** * List of all supported statistics */ - public static final String[] supportedStats = {NUM_FILES,ROW_COUNT,TOTAL_SIZE,RAW_DATA_SIZE}; + public static final List<String> SUPPORTED_STATS = ImmutableList.of( + NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE, NUM_ERASURE_CODED_FILES); /** * List of all statistics that need to be collected during query execution. These are * statistics that inherently require a scan of the data. */ - public static final String[] statsRequireCompute = new String[] {ROW_COUNT,RAW_DATA_SIZE}; + public static final List<String> STATS_REQUIRE_COMPUTE = ImmutableList.of(ROW_COUNT, RAW_DATA_SIZE); /** * List of statistics that can be collected quickly without requiring a scan of the data. */ - public static final String[] fastStats = new String[] {NUM_FILES,TOTAL_SIZE}; + public static final List<String> FAST_STATS = ImmutableList.of( + NUM_FILES, TOTAL_SIZE, NUM_ERASURE_CODED_FILES); // This string constant is used to indicate to AlterHandler that // alterPartition/alterTable is happening via statsTask or via user. @@ -154,8 +162,9 @@ public class StatsSetupConst { public static final String FALSE = "false"; // The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output. - public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] { - COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS}; + public static final List<String> TABLE_PARAMS_STATS_KEYS = ImmutableList.of( + COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS, + NUM_ERASURE_CODED_FILES); private static class ColumnStatsAccurate { private static ObjectReader objectReader; @@ -299,7 +308,7 @@ public class StatsSetupConst { public static void setStatsStateForCreateTable(Map<String, String> params, List<String> cols, String setting) { if (TRUE.equals(setting)) { - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { params.put(stat, "0"); } } http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index c6c04b7..31bf615 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.metastore; import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_COMMENT; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; @@ -2674,7 +2673,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (null == props) { return; } - for (String stat : StatsSetupConst.supportedStats) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { String statVal = props.get(stat); if (statVal != null) { //In the case of truncate table, we set the stats to be 0. http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index cbe89b6..73924ee 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -95,7 +95,6 @@ import java.util.Properties; import java.util.Map.Entry; import java.util.SortedMap; import java.util.SortedSet; -import java.util.StringJoiner; import java.util.TimeZone; import java.util.TreeMap; import java.util.TreeSet; @@ -628,7 +627,7 @@ public class MetaStoreUtils { * @return True if the passed Parameters Map contains values for all "Fast Stats". */ private static boolean containsAllFastStats(Map<String, String> partParams) { - for (String stat : StatsSetupConst.fastStats) { + for (String stat : StatsSetupConst.FAST_STATS) { if (!partParams.containsKey(stat)) { return false; } @@ -639,7 +638,7 @@ public class MetaStoreUtils { public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { // requires to calculate stats if new and old have different fast stats if ((oldPart != null) && (oldPart.getParameters() != null)) { - for (String stat : StatsSetupConst.fastStats) { + for (String stat : StatsSetupConst.FAST_STATS) { if (oldPart.getParameters().containsKey(stat)) { Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); Long newStat = Long.parseLong(newPart.getParameters().get(stat)); @@ -720,20 +719,26 @@ public class MetaStoreUtils { LOG.trace("Populating quick stats based on {} files", fileStatus.size()); int numFiles = 0; long tableSize = 0L; + int numErasureCodedFiles = 0; for (FileStatus status : fileStatus) { // don't take directories into account for quick stats TODO: wtf? if (!status.isDir()) { tableSize += status.getLen(); numFiles += 1; + if (status.isErasureCoded()) { + numErasureCodedFiles++; + } } } params.put(StatsSetupConst.NUM_FILES, Integer.toString(numFiles)); params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tableSize)); + params.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, Integer.toString(numErasureCodedFiles)); } public static void clearQuickStats(Map<String, String> params) { params.remove(StatsSetupConst.NUM_FILES); params.remove(StatsSetupConst.TOTAL_SIZE); + params.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); }