Author: hashutosh
Date: Fri Jun 6 22:58:59 2014
New Revision: 1601032
URL: http://svn.apache.org/r1601032
Log:
HIVE-7168 : Don't require to name all columns in analyze statements if stats
collection is for all columns (Ashutosh Chauhan via Prasanth J)
Modified:
hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q
hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
Modified:
hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
URL:
http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
---
hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
(original)
+++
hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
Fri Jun 6 22:58:59 2014
@@ -204,7 +204,7 @@ public class MPartitionColumnStatistics
return longLowValue;
}
- public void setLongLowValue(long longLowValue) {
+ public void setLongLowValue(Long longLowValue) {
this.longLowValue = longLowValue;
}
@@ -212,7 +212,7 @@ public class MPartitionColumnStatistics
return longHighValue;
}
- public void setLongHighValue(long longHighValue) {
+ public void setLongHighValue(Long longHighValue) {
this.longHighValue = longHighValue;
}
@@ -220,7 +220,7 @@ public class MPartitionColumnStatistics
return doubleLowValue;
}
- public void setDoubleLowValue(double doubleLowValue) {
+ public void setDoubleLowValue(Double doubleLowValue) {
this.doubleLowValue = doubleLowValue;
}
@@ -228,7 +228,7 @@ public class MPartitionColumnStatistics
return doubleHighValue;
}
- public void setDoubleHighValue(double doubleHighValue) {
+ public void setDoubleHighValue(Double doubleHighValue) {
this.doubleHighValue = doubleHighValue;
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
Fri Jun 6 22:58:59 2014
@@ -54,16 +54,15 @@ public class ColumnStatsSemanticAnalyzer
private boolean isRewritten;
private boolean isTableLevel;
- private String tableName;
private List<String> colNames;
private List<String> colType;
private String partName;
+ private Table tbl;
private class PartitionList {
private final String[] partKeys;
- private String[] partKeyTypes;
private final String[] partValues;
- private int numPartitions;
+ private final int numPartitions;
private int numPartitionValues;
PartitionList(int numPartitions) {
@@ -76,10 +75,6 @@ public class ColumnStatsSemanticAnalyzer
return numPartitions;
}
- public void setNumPartitions(int numPartitions) {
- this.numPartitions = numPartitions;
- }
-
public String[] getPartValues() {
return partValues;
}
@@ -103,18 +98,6 @@ public class ColumnStatsSemanticAnalyzer
public void setNumPartValues(int numPartValues) {
numPartitionValues = numPartValues;
}
-
- public String[] getPartKeyTypes() {
- return partKeyTypes;
- }
-
- public void setPartKeyTypes(String[] partKeyTypes) {
- this.partKeyTypes = partKeyTypes;
- }
-
- public void setPartKeyType(String partKeyType, int index) {
- partKeyTypes[index] = partKeyType;
- }
}
public ColumnStatsSemanticAnalyzer(HiveConf conf) throws SemanticException {
@@ -130,7 +113,7 @@ public class ColumnStatsSemanticAnalyzer
child0 = (ASTNode) child0.getChild(0);
if (child0.getToken().getType() == HiveParser.TOK_TABNAME) {
child1 = (ASTNode) tree.getChild(1);
- if (child1.getToken().getType() == HiveParser.TOK_TABCOLNAME) {
+ if (child1.getToken().getType() == HiveParser.KW_COLUMNS) {
rwt = true;
}
}
@@ -151,8 +134,13 @@ public class ColumnStatsSemanticAnalyzer
return isPartitioned;
}
- private String getTableName(ASTNode tree) {
- return getUnescapedName((ASTNode) tree.getChild(0).getChild(0));
+ private Table getTable(ASTNode tree) throws SemanticException {
+ String tableName = getUnescapedName((ASTNode)
tree.getChild(0).getChild(0));
+ try {
+ return db.getTable(tableName);
+ } catch (HiveException e) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
+ }
}
private PartitionList getPartKeyValuePairsFromAST(ASTNode tree) {
@@ -180,27 +168,26 @@ public class ColumnStatsSemanticAnalyzer
return partList;
}
- private List<String> getColumnName(ASTNode tree) {
- int numCols = tree.getChild(1).getChildCount();
- List<String> colName = new LinkedList<String>();
- for (int i = 0; i < numCols; i++) {
- colName.add(i, new String(getUnescapedName((ASTNode)
tree.getChild(1).getChild(i))));
- }
- return colName;
- }
+ private List<String> getColumnName(ASTNode tree) throws SemanticException{
- private int getNumColumns(ASTNode tree) {
- return tree.getChild(1).getChildCount();
+ switch (tree.getChildCount()) {
+ case 2:
+ return Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
+ case 3:
+ int numCols = tree.getChild(2).getChildCount();
+ List<String> colName = new LinkedList<String>();
+ for (int i = 0; i < numCols; i++) {
+ colName.add(i, new String(getUnescapedName((ASTNode)
tree.getChild(2).getChild(i))));
+ }
+ return colName;
+ default:
+ throw new SemanticException("Internal error. Expected number of
children of ASTNode to be"
+ + " either 2 or 3. Found : " + tree.getChildCount());
+ }
}
- private void validatePartitionKeys(String tableName, PartitionList partList)
throws
+ private void validatePartitionKeys(PartitionList partList) throws
SemanticException {
- Table tbl;
- try {
- tbl = db.getTable(tableName);
- } catch (HiveException e) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
- }
List<FieldSchema> partKeys = tbl.getPartitionKeys();
String[] inputPartKeys = partList.getPartKeys();
@@ -221,15 +208,8 @@ public class ColumnStatsSemanticAnalyzer
}
}
- private String[] getPartitionKeysType(String tableName, PartitionList
partList) throws
+ private String[] getPartitionKeysType(PartitionList partList) throws
SemanticException {
- Table tbl;
- try {
- tbl = db.getTable(tableName);
- } catch (HiveException e) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
- }
-
List<FieldSchema> partKeys = tbl.getPartitionKeys();
String[] inputPartKeys = partList.getPartKeys();
String[] inputPartKeyTypes = new String[inputPartKeys.length];
@@ -245,20 +225,13 @@ public class ColumnStatsSemanticAnalyzer
return inputPartKeyTypes;
}
- private String constructPartitionName(String tableName, PartitionList
partList)
+ private String constructPartitionName(PartitionList partList)
throws SemanticException {
- Table tbl;
Partition part;
String[] partKeys = partList.getPartKeys();
String[] partValues = partList.getPartValues();
-
- try {
- tbl = db.getTable(tableName);
- } catch (HiveException e) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
- }
-
Map<String, String> partSpec = new LinkedHashMap<String, String>();
+
for (int i=0; i<partKeys.length; i++) {
partSpec.put(partKeys[i].toLowerCase(), partValues[i]);
}
@@ -275,7 +248,7 @@ public class ColumnStatsSemanticAnalyzer
return part.getName();
}
- private void validatePartitionClause(String tableName, PartitionList
partList) throws
+ private void validatePartitionClause(PartitionList partList) throws
SemanticException {
int numPartKeys = partList.getNumPartitions();
int numPartValues = partList.getNumPartValues();
@@ -284,7 +257,7 @@ public class ColumnStatsSemanticAnalyzer
throw new
SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_SYNTAX.getMsg());
}
// Validate the user specified partition keys match the partition keys in
the table
- validatePartitionKeys(tableName, partList);
+ validatePartitionKeys(partList);
}
private StringBuilder genPartitionClause(PartitionList partList) throws
SemanticException {
@@ -295,7 +268,7 @@ public class ColumnStatsSemanticAnalyzer
StringBuilder retClause = null;
String[] partKeys = partList.getPartKeys();
String[] partValues = partList.getPartValues();
- String[] partKeysType = getPartitionKeysType(tableName, partList);
+ String[] partKeysType = getPartitionKeysType(partList);
for (int i = 0; i < partList.getNumPartitions(); i++) {
if (partValues[i] != null) {
@@ -396,41 +369,24 @@ public class ColumnStatsSemanticAnalyzer
return numBitVectors;
}
- private List<String> getTableColumnType(String tableName, List<String>
colNames, int numCols)
+ private List<String> getTableColumnType(List<String> colNames)
throws SemanticException{
List<String> colTypes = new LinkedList<String>();
- String colName;
- Table tbl;
- try {
- tbl = db.getTable(tableName);
- } catch (HiveException e) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
- }
-
List<FieldSchema> cols = tbl.getCols();
- for (int i=0; i <numCols; i++) {
- colName = colNames.get(i);
+ for (String colName : colNames) {
for (FieldSchema col: cols) {
if (colName.equalsIgnoreCase(col.getName())) {
- colTypes.add(i, new String(col.getType()));
+ colTypes.add(new String(col.getType()));
}
}
}
return colTypes;
}
- private List<String> getPartitionColumnType(String tableName, String
partName,
- List<String> colNames, int numCols) throws SemanticException {
+ private List<String> getPartitionColumnType(String partName,
+ List<String> colNames) throws SemanticException {
List<String> colTypes = new LinkedList<String>();
- String colName;
- Table tbl;
- try {
- tbl = db.getTable(tableName);
- } catch (HiveException e) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
- }
-
List<String> partNames = new ArrayList<String>();
partNames.add(partName);
List<Partition> partitionList;
@@ -443,11 +399,10 @@ public class ColumnStatsSemanticAnalyzer
Partition part = partitionList.get(0);
List<FieldSchema> cols = part.getCols();
- for (int i=0; i <numCols; i++) {
- colName = colNames.get(i);
+ for (String colName : colNames) {
for (FieldSchema col: cols) {
if (colName.equalsIgnoreCase(col.getName())) {
- colTypes.add(i, new String(col.getType()));
+ colTypes.add(new String(col.getType()));
}
}
}
@@ -469,7 +424,7 @@ public class ColumnStatsSemanticAnalyzer
rewrittenQueryBuilder.append(" )");
}
rewrittenQueryBuilder.append(" from ");
- rewrittenQueryBuilder.append(tableName);
+ rewrittenQueryBuilder.append(tbl.getTableName());
isRewritten = true;
// If partition level statistics is requested, add predicate and group by
as needed to rewritten
@@ -514,25 +469,24 @@ public class ColumnStatsSemanticAnalyzer
* an aggregation.
*/
if (shouldRewrite(tree)) {
- tableName = new String(getTableName(tree));
+ tbl = getTable(tree);
colNames = getColumnName(tree);
- int numCols = getNumColumns(tree);
// Save away the original AST
originalTree = tree;
boolean isPartitionStats = isPartitionLevelStats(tree);
PartitionList partList = null;
- checkForPartitionColumns(colNames, getPartitionKeys(tableName));
- validateSpecifiedColumnNames(tableName, colNames);
+ checkForPartitionColumns(colNames,
Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()));
+ validateSpecifiedColumnNames(colNames);
if (isPartitionStats) {
isTableLevel = false;
partList = getPartKeyValuePairsFromAST(tree);
- validatePartitionClause(tableName, partList);
- partName = constructPartitionName(tableName, partList);
- colType = getPartitionColumnType(tableName, partName, colNames,
numCols);
+ validatePartitionClause(partList);
+ partName = constructPartitionName(partList);
+ colType = getPartitionColumnType(partName, colNames);
} else {
isTableLevel = true;
- colType = getTableColumnType(tableName, colNames, numCols);
+ colType = getTableColumnType(colNames);
}
int numBitVectors = getNumBitVectorsForNDVEstimation(conf);
@@ -547,16 +501,9 @@ public class ColumnStatsSemanticAnalyzer
}
// fail early if the columns specified for column statistics are not valid
- private void validateSpecifiedColumnNames(String tableName, List<String>
specifiedCols)
+ private void validateSpecifiedColumnNames(List<String> specifiedCols)
throws SemanticException {
- List<FieldSchema> fields = null;
- try {
- fields = db.getTable(tableName).getAllCols();
- } catch (HiveException e) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
- }
- List<String> tableCols = Utilities.getColumnNamesFromFieldSchema(fields);
-
+ List<String> tableCols =
Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
for(String sc : specifiedCols) {
if (!tableCols.contains(sc.toLowerCase())) {
String msg = "'" + sc + "' (possible columns are " +
tableCols.toString() + ")";
@@ -565,17 +512,6 @@ public class ColumnStatsSemanticAnalyzer
}
}
- private List<String> getPartitionKeys(String tableName) throws
SemanticException {
- List<FieldSchema> fields;
- try {
- fields = db.getTable(tableName).getPartitionKeys();
- } catch (HiveException e) {
- throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
- }
-
- return Utilities.getColumnNamesFromFieldSchema(fields);
- }
-
private void checkForPartitionColumns(List<String> specifiedCols,
List<String> partCols)
throws SemanticException {
// Raise error if user has specified partition column for stats
@@ -602,7 +538,7 @@ public class ColumnStatsSemanticAnalyzer
qb = getQB();
qb.setAnalyzeRewrite(true);
qbp = qb.getParseInfo();
- qbp.setTableName(tableName);
+ qbp.setTableName(tbl.getTableName());
qbp.setTblLvl(isTableLevel);
if (!isTableLevel) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Fri Jun
6 22:58:59 2014
@@ -1312,7 +1312,9 @@ descStatement
analyzeStatement
@init { pushMsg("analyze statement", state); }
@after { popMsg(state); }
- : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) KW_COMPUTE KW_STATISTICS
((noscan=KW_NOSCAN) | (partialscan=KW_PARTIALSCAN) | (KW_FOR KW_COLUMNS
statsColumnName=columnNameList))? -> ^(TOK_ANALYZE $parttype $noscan?
$partialscan? $statsColumnName?)
+ : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) KW_COMPUTE KW_STATISTICS
((noscan=KW_NOSCAN) | (partialscan=KW_PARTIALSCAN)
+ | (KW_FOR KW_COLUMNS
(statsColumnName=columnNameList)?))?
+ -> ^(TOK_ANALYZE $parttype $noscan? $partialscan? KW_COLUMNS?
$statsColumnName?)
;
showStatement
Modified: hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q
(original)
+++ hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q Fri Jun
6 22:58:59 2014
@@ -18,3 +18,8 @@ analyze table Employee_Part partition (e
explain extended
analyze table Employee_Part partition (employeeSalary=4000.0) compute
statistics for columns employeeID;
analyze table Employee_Part partition (employeeSalary=4000.0) compute
statistics for columns employeeID;
+
+explain
+analyze table Employee_Part partition (employeeSalary=2000.0) compute
statistics for columns;
+analyze table Employee_Part partition (employeeSalary=2000.0) compute
statistics for columns;
+
Modified: hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
(original)
+++ hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q Fri Jun
6 22:58:59 2014
@@ -23,6 +23,15 @@ analyze table UserVisits_web_text_none c
analyze table UserVisits_web_text_none compute statistics for columns
sourceIP, avgTimeOnSite, adRevenue;
+explain
+analyze table UserVisits_web_text_none compute statistics for columns;
+
+analyze table UserVisits_web_text_none compute statistics for columns;
+
+describe formatted UserVisits_web_text_none destURL;
+describe formatted UserVisits_web_text_none adRevenue;
+describe formatted UserVisits_web_text_none avgTimeOnSite;
+
CREATE TABLE empty_tab(
a int,
b double,
Modified:
hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
(original) and
hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out Fri Jun
6 22:58:59 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
(original) and
hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out Fri Jun
6 22:58:59 2014 differ
Modified:
hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
Files
hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
(original) and
hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out Fri
Jun 6 22:58:59 2014 differ