Repository: hive Updated Branches: refs/heads/branch-1 411049167 -> e2607471e
HIVE-11786: Deprecate the use of redundant column in colunm stats related tables (Chaoyu Tang, reviewed by Szehon Ho, Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2607471 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2607471 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2607471 Branch: refs/heads/branch-1 Commit: e2607471e4b62eda9c742e079c5c5a715b069d84 Parents: 4110491 Author: ctang <[email protected]> Authored: Tue Oct 6 10:32:04 2015 -0400 Committer: ctang <[email protected]> Committed: Tue Oct 6 10:34:36 2015 -0400 ---------------------------------------------------------------------- .../hive/metastore/MetaStoreDirectSql.java | 62 ++++++++++++++------ .../hadoop/hive/metastore/ObjectStore.java | 33 ++++++----- .../hive/metastore/StatObjectConverter.java | 20 +++---- .../metastore/txn/CompactionTxnHandler.java | 50 ++++++++++++++-- .../model/MPartitionColumnStatistics.java | 12 ---- .../metastore/model/MTableColumnStatistics.java | 8 --- .../hive/metastore/VerifyingObjectStore.java | 28 ++++++++- 7 files changed, 142 insertions(+), 71 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e2607471/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 522fcc2..82d940d 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -1097,9 +1097,9 @@ class MetaStoreDirectSql { } boolean doTrace = LOG.isDebugEnabled(); long start = doTrace ? System.nanoTime() : 0; - String queryText = "select " + STATS_COLLIST + " from \"TAB_COL_STATS\" " - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (" - + makeParams(colNames.size()) + ")"; + String queryText = "select " + STATS_COLLIST + " from " + STATS_TABLE_JOINED_TBLS + + "where " + STATS_DB_NAME + " = ? and " + STATS_TABLE_NAME + " = ? " + + "and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")"; Query query = pm.newQuery("javax.jdo.query.SQL", queryText); Object[] params = new Object[colNames.size() + 2]; params[0] = dbName; @@ -1189,11 +1189,11 @@ class MetaStoreDirectSql { assert !colNames.isEmpty() && !partNames.isEmpty(); long partsFound = 0; boolean doTrace = LOG.isDebugEnabled(); - String queryText = "select count(\"COLUMN_NAME\") from \"PART_COL_STATS\"" - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " - + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" - + " group by \"PARTITION_NAME\""; + String queryText = "select count(\"COLUMN_NAME\") from " + STATS_PART_JOINED_TBLS + + "where " + STATS_DB_NAME + " = ? and " + STATS_TABLE_NAME + " = ? " + + "and \"PART_COL_STATS\".\"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ") " + + "and " + STATS_PARTITION_NAME + " in (" + makeParams(partNames.size()) + ") " + + "group by " + STATS_PARTITION_NAME; long start = doTrace ? System.nanoTime() : 0; Query query = pm.newQuery("javax.jdo.query.SQL", queryText); Object qResult = executeWithArray(query, prepareParams( @@ -1237,7 +1237,7 @@ class MetaStoreDirectSql { + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")," - + "sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + + "sum(\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS_VW + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "; String queryText = null; long start = 0; @@ -1276,7 +1276,7 @@ class MetaStoreDirectSql { // We need to extrapolate this partition based on the other partitions List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size()); queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " - + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + + " from " + PART_COL_STATS_VW + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; @@ -1341,7 +1341,7 @@ class MetaStoreDirectSql { // get sum for all columns to reduce the number of queries Map<String, Map<Integer, Object>> sumMap = new HashMap<String, Map<Integer, Object>>(); queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" - + " from \"PART_COL_STATS\"" + + " from " + PART_COL_STATS_VW + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) @@ -1418,13 +1418,13 @@ class MetaStoreDirectSql { // left/right borders if (!decimal) { queryText = "select \"" + colStatName - + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + + "\",\"PARTITION_NAME\" from " + PART_COL_STATS_VW + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by \"" + colStatName + "\""; } else { queryText = "select \"" + colStatName - + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + + "\",\"PARTITION_NAME\" from " + PART_COL_STATS_VW + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by cast(\"" + colStatName + "\" as decimal)"; @@ -1456,7 +1456,7 @@ class MetaStoreDirectSql { + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" - + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + + " from " + PART_COL_STATS_VW + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\""; start = doTrace ? System.nanoTime() : 0; @@ -1531,10 +1531,11 @@ class MetaStoreDirectSql { } boolean doTrace = LOG.isDebugEnabled(); long start = doTrace ? System.nanoTime() : 0; - String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\"" - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (" - + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in (" - + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\""; + String queryText = "select " + STATS_PARTITION_NAME + ", " + STATS_COLLIST + " from " + + STATS_PART_JOINED_TBLS + " where " + STATS_DB_NAME + " = ? and " + STATS_TABLE_NAME + " = ? " + + "and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ") " + + "and " + STATS_PARTITION_NAME + " in (" + makeParams(partNames.size()) + ") " + + "order by " + STATS_PARTITION_NAME + " asc"; Query query = pm.newQuery("javax.jdo.query.SQL", queryText); Object qResult = executeWithArray(query, prepareParams( @@ -1576,6 +1577,31 @@ class MetaStoreDirectSql { + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"AVG_COL_LEN\", " + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" "; + private static final String STATS_PART_JOINED_TBLS = "\"PART_COL_STATS\" " + + "JOIN \"PARTITIONS\" ON \"PART_COL_STATS\".\"PART_ID\" = \"PARTITIONS\".\"PART_ID\" " + + "JOIN \"TBLS\" ON \"PARTITIONS\".\"TBL_ID\" = \"TBLS\".\"TBL_ID\" " + + "JOIN \"DBS\" ON \"TBLS\".\"DB_ID\" = \"DBS\".\"DB_ID\" "; + + private static final String STATS_TABLE_JOINED_TBLS = "\"TAB_COL_STATS\" " + + "JOIN \"TBLS\" ON \"TAB_COL_STATS\".\"TBL_ID\" = \"TBLS\".\"TBL_ID\" " + + "JOIN \"DBS\" ON \"TBLS\".\"DB_ID\" = \"DBS\".\"DB_ID\" "; + + private static final String PART_COL_STATS_VW = "(SELECT \"DBS\".\"NAME\" \"DB_NAME\", " + + "\"TBLS\".\"TBL_NAME\" \"TABLE_NAME\", \"PARTITIONS\".\"PART_NAME\" \"PARTITION_NAME\", " + + "\"PCS\".\"COLUMN_NAME\", \"PCS\".\"COLUMN_TYPE\", \"PCS\".\"LONG_LOW_VALUE\", " + + "\"PCS\".\"LONG_HIGH_VALUE\", \"PCS\".\"DOUBLE_HIGH_VALUE\", \"PCS\".\"DOUBLE_LOW_VALUE\", " + + "\"PCS\".\"BIG_DECIMAL_LOW_VALUE\", \"PCS\".\"BIG_DECIMAL_HIGH_VALUE\", \"PCS\".\"NUM_NULLS\", " + + "\"PCS\".\"NUM_DISTINCTS\", \"PCS\".\"AVG_COL_LEN\",\"PCS\".\"MAX_COL_LEN\", " + + "\"PCS\".\"NUM_TRUES\", \"PCS\".\"NUM_FALSES\",\"PCS\".\"LAST_ANALYZED\" " + + "FROM \"PART_COL_STATS\" \"PCS\" JOIN \"PARTITIONS\" " + + "ON (\"PCS\".\"PART_ID\" = \"PARTITIONS\".\"PART_ID\") " + + "JOIN \"TBLS\" ON (\"PARTITIONS\".\"TBL_ID\" = \"TBLS\".\"TBL_ID\") " + + "JOIN \"DBS\" ON (\"TBLS\".\"DB_ID\" = \"DBS\".\"DB_ID\")) VW "; + + private static final String STATS_DB_NAME = "\"DBS\".\"NAME\" "; + private static final String STATS_TABLE_NAME = "\"TBLS\".\"TBL_NAME\" "; + private static final String STATS_PARTITION_NAME = "\"PARTITIONS\".\"PART_NAME\" "; + private ColumnStatistics makeColumnStats( List<Object[]> list, ColumnStatisticsDesc csd, int offset) throws MetaException { ColumnStatistics result = new ColumnStatistics(); http://git-wip-us.apache.org/repos/asf/hive/blob/e2607471/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 4d6bfcc..420f753 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -6107,8 +6107,8 @@ public class ObjectStore implements RawStore, Configurable { private void writeMTableColumnStatistics(Table table, MTableColumnStatistics mStatsObj) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - String dbName = mStatsObj.getDbName(); - String tableName = mStatsObj.getTableName(); + String tableName = mStatsObj.getTable().getTableName(); + String dbName = mStatsObj.getTable().getDatabase().getName(); String colName = mStatsObj.getColName(); QueryWrapper queryWrapper = new QueryWrapper(); @@ -6134,9 +6134,9 @@ public class ObjectStore implements RawStore, Configurable { private void writeMPartitionColumnStatistics(Table table, Partition partition, MPartitionColumnStatistics mStatsObj) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - String dbName = mStatsObj.getDbName(); - String tableName = mStatsObj.getTableName(); - String partName = mStatsObj.getPartitionName(); + String partName = mStatsObj.getPartition().getPartitionName(); + String tableName = mStatsObj.getPartition().getTable().getTableName(); + String dbName = mStatsObj.getPartition().getTable().getDatabase().getName(); String colName = mStatsObj.getColName(); LOG.info("Updating partition level column statistics for db=" + dbName + " tableName=" + @@ -6242,7 +6242,7 @@ public class ObjectStore implements RawStore, Configurable { List<MTableColumnStatistics> result = null; validateTableCols(table, colNames); Query query = queryWrapper.query = pm.newQuery(MTableColumnStatistics.class); - String filter = "tableName == t1 && dbName == t2 && ("; + String filter = "table.tableName == t1 && table.database.name == t2 && ("; String paramStr = "java.lang.String t1, java.lang.String t2"; Object[] params = new Object[colNames.size() + 2]; params[0] = table.getTableName(); @@ -6368,7 +6368,7 @@ public class ObjectStore implements RawStore, Configurable { for (int i = 0; i <= mStats.size(); ++i) { boolean isLast = i == mStats.size(); MPartitionColumnStatistics mStatsObj = isLast ? null : mStats.get(i); - String partName = isLast ? null : (String)mStatsObj.getPartitionName(); + String partName = isLast ? null : (String)mStatsObj.getPartition().getPartitionName(); if (isLast || !partName.equals(lastPartName)) { if (i != 0) { result.add(new ColumnStatistics(csd, curList)); @@ -6429,14 +6429,14 @@ public class ObjectStore implements RawStore, Configurable { validateTableCols(table, colNames); Query query = queryWrapper.query = pm.newQuery(MPartitionColumnStatistics.class); String paramStr = "java.lang.String t1, java.lang.String t2"; - String filter = "tableName == t1 && dbName == t2 && ("; + String filter = "partition.table.tableName == t1 && partition.table.database.name == t2 && ("; Object[] params = new Object[colNames.size() + partNames.size() + 2]; int i = 0; params[i++] = table.getTableName(); params[i++] = table.getDbName(); int firstI = i; for (String s : partNames) { - filter += ((i == firstI) ? "" : " || ") + "partitionName == p" + i; + filter += ((i == firstI) ? "" : " || ") + "partition.partitionName == p" + i; paramStr += ", java.lang.String p" + i; params[i++] = s; } @@ -6450,7 +6450,7 @@ public class ObjectStore implements RawStore, Configurable { filter += ")"; query.setFilter(filter); query.declareParameters(paramStr); - query.setOrdering("partitionName ascending"); + query.setOrdering("partition.partitionName ascending"); @SuppressWarnings("unchecked") List<MPartitionColumnStatistics> result = (List<MPartitionColumnStatistics>) query.executeWithArray(params); @@ -6475,7 +6475,7 @@ public class ObjectStore implements RawStore, Configurable { String dbName, String tableName, List<String> partNames) throws MetaException { ObjectPair<Query, Object[]> queryWithParams = makeQueryByPartitionNames( dbName, tableName, partNames, MPartitionColumnStatistics.class, - "tableName", "dbName", "partition.partitionName"); + "partition.table.tableName", "partition.table.database.name", "partition.partitionName"); queryWithParams.getFirst().deletePersistentAll(queryWithParams.getSecond()); } @@ -6510,13 +6510,14 @@ public class ObjectStore implements RawStore, Configurable { String parameters; if (colName != null) { filter = - "partition.partitionName == t1 && dbName == t2 && tableName == t3 && " - + "colName == t4"; + "partition.partitionName == t1 && partition.table.database.name == t2 && " + + "partition.table.tableName == t3 && colName == t4"; parameters = "java.lang.String t1, java.lang.String t2, " + "java.lang.String t3, java.lang.String t4"; } else { - filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3"; + filter = "partition.partitionName == t1 && partition.table.database.name == t2 && " + + " partition.table.tableName == t3"; parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3"; } query.setFilter(filter); @@ -6587,10 +6588,10 @@ public class ObjectStore implements RawStore, Configurable { String filter; String parameters; if (colName != null) { - filter = "table.tableName == t1 && dbName == t2 && colName == t3"; + filter = "table.tableName == t1 && table.database.name == t2 && colName == t3"; parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3"; } else { - filter = "table.tableName == t1 && dbName == t2"; + filter = "table.tableName == t1 && table.database.name == t2"; parameters = "java.lang.String t1, java.lang.String t2"; } http://git-wip-us.apache.org/repos/asf/hive/blob/e2607471/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index b3ceff1..dc56a8f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -58,8 +58,8 @@ public class StatObjectConverter { MTableColumnStatistics mColStats = new MTableColumnStatistics(); mColStats.setTable(table); - mColStats.setDbName(statsDesc.getDbName()); - mColStats.setTableName(statsDesc.getTableName()); + mColStats.setDbName("Deprecated"); + mColStats.setTableName("Deprecated"); mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed()); mColStats.setColName(statsObj.getColName()); mColStats.setColType(statsObj.getColType()); @@ -289,8 +289,8 @@ public class StatObjectConverter { MTableColumnStatistics mStatsObj) { ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); statsDesc.setIsTblLevel(true); - statsDesc.setDbName(mStatsObj.getDbName()); - statsDesc.setTableName(mStatsObj.getTableName()); + statsDesc.setTableName(mStatsObj.getTable().getTableName()); + statsDesc.setDbName(mStatsObj.getTable().getDatabase().getName()); statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed()); return statsDesc; } @@ -304,9 +304,9 @@ public class StatObjectConverter { MPartitionColumnStatistics mColStats = new MPartitionColumnStatistics(); mColStats.setPartition(partition); - mColStats.setDbName(statsDesc.getDbName()); - mColStats.setTableName(statsDesc.getTableName()); - mColStats.setPartitionName(statsDesc.getPartName()); + mColStats.setDbName("Deprecated"); + mColStats.setTableName("Deprecated"); + mColStats.setPartitionName("Deprecated"); mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed()); mColStats.setColName(statsObj.getColName()); mColStats.setColType(statsObj.getColType()); @@ -442,9 +442,9 @@ public class StatObjectConverter { MPartitionColumnStatistics mStatsObj) { ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); statsDesc.setIsTblLevel(false); - statsDesc.setDbName(mStatsObj.getDbName()); - statsDesc.setTableName(mStatsObj.getTableName()); - statsDesc.setPartName(mStatsObj.getPartitionName()); + statsDesc.setPartName(mStatsObj.getPartition().getPartitionName()); + statsDesc.setTableName(mStatsObj.getPartition().getTable().getTableName()); + statsDesc.setDbName(mStatsObj.getPartition().getTable().getDatabase().getName()); statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed()); return statsDesc; } http://git-wip-us.apache.org/repos/asf/hive/blob/e2607471/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java index 44ee5c6..7d0a76a 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java @@ -554,15 +554,18 @@ public class CompactionTxnHandler extends TxnHandler { StringBuilder bldr = new StringBuilder(); bldr.append("SELECT ").append(quote).append("COLUMN_NAME").append(quote) .append(" FROM ") - .append(quote).append((ci.partName == null ? "TAB_COL_STATS" : "PART_COL_STATS")) - .append(quote) + .append((ci.partName == null ? + getTableColStatsJoinedTables(quote) : getPartitionColStatsJoinedTables(quote))) .append(" WHERE ") - .append(quote).append("DB_NAME").append(quote).append(" = '").append(ci.dbname) - .append("' AND ").append(quote).append("TABLE_NAME").append(quote) + .append(quote).append("DBS").append(quote).append(".").append(quote).append("NAME").append(quote) + .append(" = '").append(ci.dbname) + .append("' AND ") + .append(quote).append("TBLS").append(quote).append(".").append(quote).append("TBL_NAME").append(quote) .append(" = '").append(ci.tableName).append("'"); if (ci.partName != null) { - bldr.append(" AND ").append(quote).append("PARTITION_NAME").append(quote).append(" = '") - .append(ci.partName).append("'"); + bldr.append(" AND ") + .append(quote).append("PARTITIONS").append(quote).append(".").append(quote).append("PART_NAME").append(quote) + .append(" = '").append(ci.partName).append("'"); } String s = bldr.toString(); @@ -612,6 +615,41 @@ public class CompactionTxnHandler extends TxnHandler { } return new ValidCompactorTxnList(exceptions, minOpenTxn, highWater); } + + private String getTableColStatsJoinedTables(String quote) { + return (new StringBuffer(quote)).append("TAB_COL_STATS").append(quote) + .append(" JOIN ").append(quote).append("TBLS").append(quote) + .append(" ON ").append(quote).append("TAB_COL_STATS").append(quote) + .append(".").append(quote).append("TBL_ID").append(quote) + .append(" = ").append(quote).append("TBLS").append(quote) + .append(".").append(quote).append("TBL_ID").append(quote) + .append(" JOIN ").append(quote).append("DBS").append(quote) + .append(" ON ").append(quote).append("TBLS").append(quote) + .append(".").append(quote).append("DB_ID").append(quote) + .append(" = ").append(quote).append("DBS").append(quote) + .append(".").append(quote).append("DB_ID").append(quote).toString(); + } + + private String getPartitionColStatsJoinedTables(String quote) { + //actually we do not have to get the quote from database since double quoted identifier + //should work on all favors of db so far Hive supports. + return (new StringBuffer(quote)).append("PART_COL_STATS").append(quote) + .append(" JOIN ").append(quote).append("PARTITIONS").append(quote) + .append(" ON ").append(quote).append("PART_COL_STATS").append(quote) + .append(".").append(quote).append("PART_ID").append(quote) + .append(" = ").append(quote).append("PARTITIONS").append(quote) + .append(".").append(quote).append("PART_ID").append(quote) + .append(" JOIN ").append(quote).append("TBLS").append(quote) + .append(" ON ").append(quote).append("PARTITIONS").append(quote) + .append(".").append(quote).append("TBL_ID").append(quote) + .append(" = ").append(quote).append("TBLS").append(quote) + .append(".").append(quote).append("TBL_ID").append(quote) + .append(" JOIN ").append(quote).append("DBS").append(quote) + .append(" ON ").append(quote).append("TBLS").append(quote) + .append(".").append(quote).append("DB_ID").append(quote) + .append(" = ").append(quote).append("DBS").append(quote) + .append(".").append(quote).append("DB_ID").append(quote).toString(); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/e2607471/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java ---------------------------------------------------------------------- diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 2967a60..70608a9 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -56,10 +56,6 @@ public class MPartitionColumnStatistics { public MPartitionColumnStatistics() {} - public String getTableName() { - return tableName; - } - public void setTableName(String tableName) { this.tableName = tableName; } @@ -128,10 +124,6 @@ public class MPartitionColumnStatistics { this.lastAnalyzed = lastAnalyzed; } - public String getDbName() { - return dbName; - } - public void setDbName(String dbName) { this.dbName = dbName; } @@ -144,10 +136,6 @@ public class MPartitionColumnStatistics { this.partition = partition; } - public String getPartitionName() { - return partitionName; - } - public void setPartitionName(String partitionName) { this.partitionName = partitionName; } http://git-wip-us.apache.org/repos/asf/hive/blob/e2607471/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java ---------------------------------------------------------------------- diff --git a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 132f7a1..d8dcf5b 100644 --- a/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ b/metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -62,10 +62,6 @@ public class MTableColumnStatistics { this.table = table; } - public String getTableName() { - return tableName; - } - public void setTableName(String tableName) { this.tableName = tableName; } @@ -142,10 +138,6 @@ public class MTableColumnStatistics { this.lastAnalyzed = lastAnalyzed; } - public String getDbName() { - return dbName; - } - public void setDbName(String dbName) { this.dbName = dbName; } http://git-wip-us.apache.org/repos/asf/hive/blob/e2607471/metastore/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java index 7e46523..8d3819a 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java @@ -25,6 +25,8 @@ import java.lang.reflect.Array; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -36,6 +38,7 @@ import org.apache.commons.lang.builder.EqualsBuilder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; @@ -104,10 +107,21 @@ class VerifyingObjectStore extends ObjectStore { dbName, tableName, colNames, true, false); ColumnStatistics jdoResult = getTableColumnStatisticsInternal( dbName, tableName, colNames, false, true); + if (sqlResult != null && jdoResult != null) { + Collections.sort(sqlResult.getStatsObj(), new ColumnStatsComparator()); + Collections.sort(jdoResult.getStatsObj(), new ColumnStatsComparator()); + } verifyObjects(sqlResult, jdoResult, ColumnStatistics.class); return sqlResult; } + private static class ColumnStatsComparator implements Comparator<ColumnStatisticsObj> { + @Override + public int compare(ColumnStatisticsObj obj1, ColumnStatisticsObj obj2) { + return obj1.getColName().compareTo(obj2.getColName()); + } + } + @Override public List<ColumnStatistics> getPartitionColumnStatistics(String dbName, String tableName, List<String> partNames, List<String> colNames) @@ -116,7 +130,19 @@ class VerifyingObjectStore extends ObjectStore { dbName, tableName, partNames, colNames, true, false); List<ColumnStatistics> jdoResult = getPartitionColumnStatisticsInternal( dbName, tableName, partNames, colNames, false, true); - verifyLists(sqlResult, jdoResult, ColumnStatistics.class); + + if (sqlResult.size() != jdoResult.size()) { + String msg = "Lists are not the same size: SQL " + sqlResult.size() + + ", ORM " + jdoResult.size(); + LOG.error(msg); + throw new MetaException(msg); + } + + for (int i = 0; i < jdoResult.size(); i++) { + Collections.sort(sqlResult.get(i).getStatsObj(), new ColumnStatsComparator()); + Collections.sort(jdoResult.get(i).getStatsObj(), new ColumnStatsComparator()); + verifyObjects(sqlResult.get(i), jdoResult.get(i), ColumnStatistics.class); + } return sqlResult; }
