Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java Thu Sep 4 02:49:46 2014 @@ -38,7 +38,7 @@ import org.apache.hadoop.hive.serde2.typ import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic.QBSubQueryRewrite; public class QBSubQuery implements ISubQueryJoinInfo { - + public static enum SubQueryType { EXISTS, NOT_EXISTS, @@ -149,16 +149,16 @@ public class QBSubQuery implements ISubQ } /* - * This class captures the information about a + * This class captures the information about a * conjunct in the where clause of the SubQuery. * For a equality predicate it capture for each side: * - the AST * - the type of Expression (basically what columns are referenced) - * - for Expressions that refer the parent it captures the + * - for Expressions that refer the parent it captures the * parent's ColumnInfo. In case of outer Aggregation expressions * we need this to introduce a new mapping in the OuterQuery * RowResolver. A join condition must use qualified column references, - * so we generate a new name for the aggr expression and use it in the + * so we generate a new name for the aggr expression and use it in the * joining condition. * For e.g. * having exists ( select x from R2 where y = min(R1.z) ) @@ -174,8 +174,8 @@ public class QBSubQuery implements ISubQ private final ColumnInfo leftOuterColInfo; private final ColumnInfo rightOuterColInfo; - Conjunct(ASTNode leftExpr, - ASTNode rightExpr, + Conjunct(ASTNode leftExpr, + ASTNode rightExpr, ExprType leftExprType, ExprType rightExprType, ColumnInfo leftOuterColInfo, @@ -239,8 +239,8 @@ public class QBSubQuery implements ISubQ Stack<Node> stack; ConjunctAnalyzer(RowResolver parentQueryRR, - boolean forHavingClause, - String parentQueryNewAlias) { + boolean forHavingClause, + String parentQueryNewAlias) { this.parentQueryRR = parentQueryRR; defaultExprProcessor = new DefaultExprProcessor(); this.forHavingClause = forHavingClause; @@ -260,13 +260,13 @@ public class QBSubQuery implements ISubQ private ObjectPair<ExprType,ColumnInfo> analyzeExpr(ASTNode expr) { ColumnInfo cInfo = null; if ( forHavingClause ) { - try { - cInfo = parentQueryRR.getExpression(expr); - if ( cInfo != null) { - return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); - } - } catch(SemanticException se) { - } + try { + cInfo = parentQueryRR.getExpression(expr); + if ( cInfo != null) { + return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); + } + } catch(SemanticException se) { + } } if ( expr.getType() == HiveParser.DOT) { ASTNode dot = firstDot(expr); @@ -308,12 +308,12 @@ public class QBSubQuery implements ISubQ ObjectPair<ExprType,ColumnInfo> leftInfo = analyzeExpr(left); ObjectPair<ExprType,ColumnInfo> rightInfo = analyzeExpr(right); - return new Conjunct(left, right, + return new Conjunct(left, right, leftInfo.getFirst(), rightInfo.getFirst(), leftInfo.getSecond(), rightInfo.getSecond()); } else { ObjectPair<ExprType,ColumnInfo> sqExprInfo = analyzeExpr(conjunct); - return new Conjunct(conjunct, null, + return new Conjunct(conjunct, null, sqExprInfo.getFirst(), null, sqExprInfo.getSecond(), sqExprInfo.getSecond()); } @@ -354,86 +354,86 @@ public class QBSubQuery implements ISubQ } /* - * When transforming a Not In SubQuery we need to check for nulls in the + * When transforming a Not In SubQuery we need to check for nulls in the * Joining expressions of the SubQuery. If there are nulls then the SubQuery always - * return false. For more details see + * return false. For more details see * https://issues.apache.org/jira/secure/attachment/12614003/SubQuerySpec.pdf - * + * * Basically, SQL semantics say that: * - R1.A not in (null, 1, 2, ...) - * is always false. - * A 'not in' operator is equivalent to a '<> all'. Since a not equal check with null + * is always false. + * A 'not in' operator is equivalent to a '<> all'. Since a not equal check with null * returns false, a not in predicate against aset with a 'null' value always returns false. - * + * * So for not in SubQuery predicates: * - we join in a null count predicate. * - And the joining condition is that the 'Null Count' query has a count of 0. - * + * */ class NotInCheck implements ISubQueryJoinInfo { - + private static final String CNT_ALIAS = "c1"; - + /* * expressions in SubQ that are joined to the Outer Query. */ List<ASTNode> subQryCorrExprs; - + /* * row resolver of the SubQuery. * Set by the SemanticAnalyzer after the Plan for the SubQuery is genned. * This is neede in case the SubQuery select list contains a TOK_ALLCOLREF */ RowResolver sqRR; - + NotInCheck() { subQryCorrExprs = new ArrayList<ASTNode>(); } - + void addCorrExpr(ASTNode corrExpr) { subQryCorrExprs.add(corrExpr); } - + public ASTNode getSubQueryAST() { ASTNode ast = SubQueryUtils.buildNotInNullCheckQuery( - QBSubQuery.this.getSubQueryAST(), - QBSubQuery.this.getAlias(), - CNT_ALIAS, + QBSubQuery.this.getSubQueryAST(), + QBSubQuery.this.getAlias(), + CNT_ALIAS, subQryCorrExprs, sqRR); SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin); return ast; } - + public String getAlias() { return QBSubQuery.this.getAlias() + "_notin_nullcheck"; } - + public JoinType getJoinType() { return JoinType.LEFTSEMI; } - + public ASTNode getJoinConditionAST() { - ASTNode ast = + ASTNode ast = SubQueryUtils.buildNotInNullJoinCond(getAlias(), CNT_ALIAS); SubQueryUtils.setOriginDeep(ast, QBSubQuery.this.originalSQASTOrigin); return ast; } - + public QBSubQuery getSubQuery() { return QBSubQuery.this; } - + public String getOuterQueryId() { return QBSubQuery.this.getOuterQueryId(); } - + void setSQRR(RowResolver sqRR) { this.sqRR = sqRR; } - + } - + private final String outerQueryId; private final int sqIdx; private final String alias; @@ -455,11 +455,11 @@ public class QBSubQuery implements ISubQ private int numOfCorrelationExprsAddedToSQSelect; private boolean groupbyAddedToSQ; - + private int numOuterCorrExprsForHaving; - + private NotInCheck notInCheck; - + private QBSubQueryRewrite subQueryDiagnostic; public QBSubQuery(String outerQueryId, @@ -483,11 +483,11 @@ public class QBSubQuery implements ISubQ originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST); numOfCorrelationExprsAddedToSQSelect = 0; groupbyAddedToSQ = false; - + if ( operator.getType() == SubQueryType.NOT_IN ) { notInCheck = new NotInCheck(); } - + subQueryDiagnostic = SubQueryDiagnostic.getRewrite(this, ctx.getTokenRewriteStream(), ctx); } @@ -500,18 +500,18 @@ public class QBSubQuery implements ISubQ public SubQueryTypeDef getOperator() { return operator; } - + public ASTNode getOriginalSubQueryASTForRewrite() { return (operator.getType() == SubQueryType.NOT_EXISTS - || operator.getType() == SubQueryType.NOT_IN ? - (ASTNode) originalSQASTOrigin.getUsageNode().getParent() : + || operator.getType() == SubQueryType.NOT_IN ? + (ASTNode) originalSQASTOrigin.getUsageNode().getParent() : originalSQASTOrigin.getUsageNode()); } void validateAndRewriteAST(RowResolver outerQueryRR, - boolean forHavingClause, - String outerQueryAlias, - Set<String> outerQryAliases) throws SemanticException { + boolean forHavingClause, + String outerQueryAlias, + Set<String> outerQryAliases) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); @@ -519,12 +519,12 @@ public class QBSubQuery implements ISubQ if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) { selectExprStart = 1; } - + /* * Restriction.16.s :: Correlated Expression in Outer Query must not contain * unqualified column references. */ - if ( parentQueryExpression != null && !forHavingClause ) { + if ( parentQueryExpression != null && !forHavingClause ) { ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(parentQueryExpression); if ( u != null ) { subQueryAST.setOrigin(originalSQASTOrigin); @@ -532,7 +532,7 @@ public class QBSubQuery implements ISubQ u, "Correlating expression cannot contain unqualified column references.")); } } - + /* * Restriction 17.s :: SubQuery cannot use the same table alias as one used in * the Outer Query. @@ -546,14 +546,14 @@ public class QBSubQuery implements ISubQ } if ( sharedAlias != null) { ASTNode whereClause = SubQueryUtils.subQueryWhere(subQueryAST); - + if ( whereClause != null ) { ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(whereClause); if ( u != null ) { subQueryAST.setOrigin(originalSQASTOrigin); throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( u, "SubQuery cannot use the table alias: " + sharedAlias + "; " + - "this is also an alias in the Outer Query and SubQuery contains a unqualified column reference")); + "this is also an alias in the Outer Query and SubQuery contains a unqualified column reference")); } } } @@ -641,25 +641,25 @@ public class QBSubQuery implements ISubQ } void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, - boolean forHavingClause, - String outerQueryAlias) throws SemanticException { + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode parentQueryJoinCond = null; if ( parentQueryExpression != null ) { - + ColumnInfo outerQueryCol = null; try { outerQueryCol = outerQueryRR.getExpression(parentQueryExpression); } catch(SemanticException se) { } - + parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond( getOuterQueryExpression(), alias, sqRR); - + if ( outerQueryCol != null ) { - rewriteCorrConjunctForHaving(parentQueryJoinCond, true, + rewriteCorrConjunctForHaving(parentQueryJoinCond, true, outerQueryAlias, outerQueryRR, outerQueryCol); } subQueryDiagnostic.addJoinCondition(parentQueryJoinCond, outerQueryCol != null, true); @@ -682,10 +682,10 @@ public class QBSubQuery implements ISubQ ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) { if (postJoinConditionAST == null ) { return outerQryFilter; - } - + } + subQueryDiagnostic.addPostJoinCondition(postJoinConditionAST); - + if ( outerQryFilter == null ) { return postJoinConditionAST; } @@ -738,7 +738,7 @@ public class QBSubQuery implements ISubQ * Additional things for Having clause: * - A correlation predicate may refer to an aggregation expression. * - This introduces 2 twists to the rewrite: - * a. When analyzing equality predicates we need to analyze each side + * a. When analyzing equality predicates we need to analyze each side * to see if it is an aggregation expression from the Outer Query. * So for e.g. this is a valid correlation predicate: * R2.x = min(R1.y) @@ -748,12 +748,12 @@ public class QBSubQuery implements ISubQ * to contain a qualified column references. * We handle this by generating a new name for the aggregation expression, * like R1._gby_sq_col_1 and adding this mapping to the Outer Query's - * Row Resolver. Then we construct a joining predicate using this new + * Row Resolver. Then we construct a joining predicate using this new * name; so in our e.g. the condition would be: R2.x = R1._gby_sq_col_1 */ private void rewrite(RowResolver parentQueryRR, - boolean forHavingClause, - String outerQueryAlias) throws SemanticException { + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); ASTNode whereClause = SubQueryUtils.subQueryWhere(subQueryAST); @@ -766,7 +766,7 @@ public class QBSubQuery implements ISubQ SubQueryUtils.extractConjuncts(searchCond, conjuncts); ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR, - forHavingClause, outerQueryAlias); + forHavingClause, outerQueryAlias); ASTNode sqNewSearchCond = null; for(ASTNode conjunctAST : conjuncts) { @@ -805,7 +805,7 @@ public class QBSubQuery implements ISubQ corrCondLeftIsRewritten = true; if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) { corrCondRightIsRewritten = true; - rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, + rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, parentQueryRR, conjunct.getRightOuterColInfo()); } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( @@ -829,7 +829,7 @@ public class QBSubQuery implements ISubQ corrCondRightIsRewritten = true; if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) { corrCondLeftIsRewritten = true; - rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, + rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, parentQueryRR, conjunct.getLeftOuterColInfo()); } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( @@ -901,7 +901,7 @@ public class QBSubQuery implements ISubQ for(ASTNode child : newChildren ) { subQueryAST.addChild(child); } - + subQueryDiagnostic.setAddGroupByClause(); return groupBy; @@ -927,26 +927,26 @@ public class QBSubQuery implements ISubQ public int getNumOfCorrelationExprsAddedToSQSelect() { return numOfCorrelationExprsAddedToSQSelect; } - - + + public QBSubQueryRewrite getDiagnostic() { return subQueryDiagnostic; } - + public QBSubQuery getSubQuery() { return this; } - + NotInCheck getNotInCheck() { return notInCheck; } - + private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode, boolean refersLeft, String outerQueryAlias, RowResolver outerQueryRR, ColumnInfo outerQueryCol) { - + String newColAlias = "_gby_sq_col_" + numOuterCorrExprsForHaving++; ASTNode outerExprForCorr = SubQueryUtils.createColRefAST(outerQueryAlias, newColAlias); if ( refersLeft ) { @@ -956,5 +956,5 @@ public class QBSubQuery implements ISubQ } outerQueryRR.put(outerQueryAlias, newColAlias, outerQueryCol); } - + }
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java Thu Sep 4 02:49:46 2014 @@ -98,7 +98,7 @@ public class RowResolver implements Seri public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { if (!addMappingOnly(tab_alias, col_alias, colInfo)) { - rowSchema.getSignature().add(colInfo); + rowSchema.getSignature().add(colInfo); } } @@ -289,7 +289,7 @@ public class RowResolver implements Seri public boolean getIsExprResolver() { return isExprResolver; } - + public String[] getAlternateMappings(String internalName) { return altInvRslvMap.get(internalName); } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Sep 4 02:49:46 2014 @@ -2374,8 +2374,8 @@ public class SemanticAnalyzer extends Ba String havingInputAlias = null; if ( forHavingClause ) { - havingInputAlias = "gby_sq" + sqIdx; - aliasToOpInfo.put(havingInputAlias, input); + havingInputAlias = "gby_sq" + sqIdx; + aliasToOpInfo.put(havingInputAlias, input); } subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet()); @@ -2486,7 +2486,10 @@ public class SemanticAnalyzer extends Ba ExprNodeDesc filterPred = null; List<Boolean> nullSafes = joinTree.getNullSafes(); for (int i = 0; i < joinKeys.length; i++) { - if ( nullSafes.get(i)) { + if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc && + ((ExprNodeColumnDesc)joinKeys[i]).getIsPartitionColOrVirtualCol())) { + // no need to generate is not null predicate for partitioning or + // virtual column, since those columns can never be null. continue; } List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>(); @@ -9721,6 +9724,11 @@ public class SemanticAnalyzer extends Ba LOG.info("Completed plan generation"); + // put accessed columns to readEntity + if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { + putAccessedColumnsToReadEntity(inputs, columnAccessInfo); + } + if (!ctx.getExplain()) { // if desired check we're not going over partition scan limits enforceScanLimits(pCtx, origFetchTask); @@ -9729,6 +9737,26 @@ public class SemanticAnalyzer extends Ba return; } + private void putAccessedColumnsToReadEntity(HashSet<ReadEntity> inputs, ColumnAccessInfo columnAccessInfo) { + Map<String, List<String>> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); + if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { + for(ReadEntity entity: inputs) { + switch (entity.getType()) { + case TABLE: + entity.getAccessedColumns().addAll( + tableToColumnAccessMap.get(entity.getTable().getCompleteName())); + break; + case PARTITION: + entity.getAccessedColumns().addAll( + tableToColumnAccessMap.get(entity.getPartition().getTable().getCompleteName())); + break; + default: + // no-op + } + } + } + } + private void enforceScanLimits(ParseContext pCtx, FetchTask fTask) throws SemanticException { int scanLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITTABLESCANPARTITION); @@ -11765,40 +11793,40 @@ public class SemanticAnalyzer extends Ba } private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, - Operator<? extends OperatorDesc> reduceSinkOp, RowResolver gByRR) { - if ( gByExpr.getType() == HiveParser.DOT + Operator<? extends OperatorDesc> reduceSinkOp, RowResolver gByRR) { + if ( gByExpr.getType() == HiveParser.DOT && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL ) { - String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr - .getChild(0).getChild(0).getText()); - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( - gByExpr.getChild(1).getText()); - gByRR.put(tab_alias, col_alias, colInfo); - } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr - .getChild(0).getText()); - String tab_alias = null; - /* - * If the input to the GBy has a tab alias for the column, then add an entry - * based on that tab_alias. - * For e.g. this query: - * select b.x, count(*) from t1 b group by x - * needs (tab_alias=b, col_alias=x) in the GBy RR. - * tab_alias=b comes from looking at the RowResolver that is the ancestor - * before any GBy/ReduceSinks added for the GBY operation. - */ - Operator<? extends OperatorDesc> parent = reduceSinkOp; - while ( parent instanceof ReduceSinkOperator || - parent instanceof GroupByOperator ) { - parent = parent.getParentOperators().get(0); - } - RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); - try { - ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias); - tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); - } catch(SemanticException se) { - } - gByRR.put(tab_alias, col_alias, colInfo); - } + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getChild(0).getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( + gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an entry + * based on that tab_alias. + * For e.g. this query: + * select b.x, count(*) from t1 b group by x + * needs (tab_alias=b, col_alias=x) in the GBy RR. + * tab_alias=b comes from looking at the RowResolver that is the ancestor + * before any GBy/ReduceSinks added for the GBY operation. + */ + Operator<? extends OperatorDesc> parent = reduceSinkOp; + while ( parent instanceof ReduceSinkOperator || + parent instanceof GroupByOperator ) { + parent = parent.getParentOperators().get(0); + } + RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); + try { + ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch(SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } } private WriteEntity.WriteType determineWriteType(LoadTableDesc ltd, boolean isNonNativeTable) { Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/authorization/HiveAuthorizationTaskFactoryImpl.java Thu Sep 4 02:49:46 2014 @@ -206,7 +206,7 @@ public class HiveAuthorizationTaskFactor List<String> roles = new ArrayList<String>(); for (int i = rolesStartPos; i < ast.getChildCount(); i++) { - roles.add(BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(i).getText()).toLowerCase()); + roles.add(BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(i).getText())); } String roleOwnerName = SessionState.getUserFromAuthenticator(); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java Thu Sep 4 02:49:46 2014 @@ -32,6 +32,7 @@ public class ColStatistics { private double avgColLen; private long numTrues; private long numFalses; + private Range range; public ColStatistics(String tabAlias, String colName, String colType) { this.setTableAlias(tabAlias); @@ -118,6 +119,17 @@ public class ColStatistics { this.numFalses = numFalses; } + public Range getRange() { + return range; + } + + public void setRange(Number minVal, Number maxVal) { + this.range = new Range(minVal, maxVal); + } + + public void setRange(Range r) { + this.range = r; + } @Override public String toString() { @@ -150,7 +162,24 @@ public class ColStatistics { clone.setNumNulls(numNulls); clone.setNumTrues(numTrues); clone.setNumFalses(numFalses); + if (range != null ) { + clone.setRange(range.clone()); + } return clone; } + public static class Range { + public final Number minValue; + public final Number maxValue; + Range(Number minValue, Number maxValue) { + super(); + this.minValue = minValue; + this.maxValue = maxValue; + } + @Override + public Range clone() { + return new Range(minValue, maxValue); + } + } + } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java Thu Sep 4 02:49:46 2014 @@ -37,7 +37,7 @@ public class LoadFileDesc extends LoadDe private String destinationCreateTable; static { - PTFUtils.makeTransient(LoadFileDesc.class, "targetDir"); + PTFUtils.makeTransient(LoadFileDesc.class, "targetDir"); } public LoadFileDesc() { } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java Thu Sep 4 02:49:46 2014 @@ -38,7 +38,7 @@ public class LoadMultiFilesDesc implemen private transient List<Path> srcDirs; static { - PTFUtils.makeTransient(LoadMultiFilesDesc.class, "targetDirs"); + PTFUtils.makeTransient(LoadMultiFilesDesc.class, "targetDirs"); } public LoadMultiFilesDesc() { } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Thu Sep 4 02:49:46 2014 @@ -116,6 +116,8 @@ public class MapWork extends BaseWork { private boolean useOneNullRowInputFormat; + private boolean dummyTableScan = false; + public MapWork() {} public MapWork(String name) { @@ -525,4 +527,12 @@ public class MapWork extends BaseWork { } } } + + public void setDummyTableScan(boolean dummyTableScan) { + this.dummyTableScan = dummyTableScan; + } + + public boolean getDummyTableScan() { + return dummyTableScan; + } } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java Thu Sep 4 02:49:46 2014 @@ -102,8 +102,7 @@ public class RoleDDLDesc extends DDLDesc public RoleDDLDesc(String principalName, PrincipalType principalType, RoleOperation operation, String roleOwnerName) { - this.name = (principalName != null && principalType == PrincipalType.ROLE) ? - principalName.toLowerCase() : principalName; + this.name = principalName; this.principalType = principalType; this.operation = operation; this.roleOwnerName = roleOwnerName; Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationUtils.java Thu Sep 4 02:49:46 2014 @@ -67,9 +67,6 @@ public class AuthorizationUtils { case ROLE: return HivePrincipalType.ROLE; case GROUP: - if (SessionState.get().getAuthorizationMode() == SessionState.AuthorizationMode.V2) { - throw new HiveException(ErrorMsg.UNSUPPORTED_AUTHORIZATION_PRINCIPAL_TYPE_GROUP); - } return HivePrincipalType.GROUP; default: //should not happen as we take care of all existing types Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java Thu Sep 4 02:49:46 2014 @@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.securi import java.io.FileNotFoundException; import java.io.IOException; import java.security.AccessControlException; -import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -34,12 +34,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -48,7 +45,6 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.shims.ShimLoader; /** * StorageBasedAuthorizationProvider is an implementation of @@ -141,28 +137,77 @@ public class StorageBasedAuthorizationPr public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { Path path = getDbLocation(db); + + // extract drop privileges + DropPrivilegeExtractor privExtractor = new DropPrivilegeExtractor(readRequiredPriv, + writeRequiredPriv); + readRequiredPriv = privExtractor.getReadReqPriv(); + writeRequiredPriv = privExtractor.getWriteReqPriv(); + + // authorize drops if there was a drop privilege requirement + if(privExtractor.hasDropPrivilege()) { + checkDeletePermission(path, getConf(), authenticator.getUserName()); + } + authorize(path, readRequiredPriv, writeRequiredPriv); } @Override public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { - - // To create/drop/alter a table, the owner should have WRITE permission on the database directory - authorize(hive_db.getDatabase(table.getDbName()), readRequiredPriv, writeRequiredPriv); - - // If the user has specified a location - external or not, check if the user has the try { initWh(); - String location = table.getTTable().getSd().getLocation(); - if (location != null && !location.isEmpty()) { - authorize(new Path(location), readRequiredPriv, writeRequiredPriv); - } } catch (MetaException ex) { throw hiveException(ex); } + + // extract any drop privileges out of required privileges + DropPrivilegeExtractor privExtractor = new DropPrivilegeExtractor(readRequiredPriv, + writeRequiredPriv); + readRequiredPriv = privExtractor.getReadReqPriv(); + writeRequiredPriv = privExtractor.getWriteReqPriv(); + + // if CREATE or DROP priv requirement is there, the owner should have WRITE permission on + // the database directory + if (privExtractor.hasDropPrivilege || requireCreatePrivilege(readRequiredPriv) + || requireCreatePrivilege(writeRequiredPriv)) { + authorize(hive_db.getDatabase(table.getDbName()), new Privilege[] {}, + new Privilege[] { Privilege.ALTER_DATA }); + } + + Path path = table.getDataLocation(); + // authorize drops if there was a drop privilege requirement, and + // table is not external (external table data is not dropped) + if (privExtractor.hasDropPrivilege() && table.getTableType() != TableType.EXTERNAL_TABLE) { + checkDeletePermission(path, getConf(), authenticator.getUserName()); + } + + // If the user has specified a location - external or not, check if the user + // has the permissions on the table dir + if (path != null) { + authorize(path, readRequiredPriv, writeRequiredPriv); + } } + + /** + * + * @param privs + * @return true, if set of given privileges privs contain CREATE privilege + */ + private boolean requireCreatePrivilege(Privilege[] privs) { + if(privs == null) { + return false; + } + for (Privilege priv : privs) { + if (priv.equals(Privilege.CREATE)) { + return true; + } + } + return false; + } + + @Override public void authorize(Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { @@ -173,17 +218,39 @@ public class StorageBasedAuthorizationPr Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { + // extract drop privileges + DropPrivilegeExtractor privExtractor = new DropPrivilegeExtractor(readRequiredPriv, + writeRequiredPriv); + readRequiredPriv = privExtractor.getReadReqPriv(); + writeRequiredPriv = privExtractor.getWriteReqPriv(); + + // authorize drops if there was a drop privilege requirement + if(privExtractor.hasDropPrivilege()) { + checkDeletePermission(part.getDataLocation(), getConf(), authenticator.getUserName()); + } + // Partition path can be null in the case of a new create partition - in this case, // we try to default to checking the permissions of the parent table. // Partition itself can also be null, in cases where this gets called as a generic // catch-all call in cases like those with CTAS onto an unpartitioned table (see HIVE-1887) if ((part == null) || (part.getLocation() == null)) { - authorize(table, readRequiredPriv, writeRequiredPriv); + // this should be the case only if this is a create partition. + // The privilege needed on the table should be ALTER_DATA, and not CREATE + authorize(table, new Privilege[]{}, new Privilege[]{Privilege.ALTER_DATA}); } else { authorize(part.getDataLocation(), readRequiredPriv, writeRequiredPriv); } } + private void checkDeletePermission(Path dataLocation, Configuration conf, String userName) + throws HiveException { + try { + FileUtils.checkDeletePermission(dataLocation, conf, userName); + } catch (Exception e) { + throw new HiveException(e); + } + } + @Override public void authorize(Table table, Partition part, List<String> columns, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, @@ -191,11 +258,7 @@ public class StorageBasedAuthorizationPr // In a simple storage-based auth, we have no information about columns // living in different files, so we do simple partition-auth and ignore // the columns parameter. - if ((part != null) && (part.getTable() != null)) { - authorize(part.getTable(), part, readRequiredPriv, writeRequiredPriv); - } else { - authorize(table, part, readRequiredPriv, writeRequiredPriv); - } + authorize(table, part, readRequiredPriv, writeRequiredPriv); } @Override @@ -373,4 +436,48 @@ public class StorageBasedAuthorizationPr // no-op - SBA does not attempt to authorize auth api call. Allow it } + public class DropPrivilegeExtractor { + + private boolean hasDropPrivilege = false; + private final Privilege[] readReqPriv; + private final Privilege[] writeReqPriv; + + public DropPrivilegeExtractor(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) { + this.readReqPriv = extractDropPriv(readRequiredPriv); + this.writeReqPriv = extractDropPriv(writeRequiredPriv); + } + + private Privilege[] extractDropPriv(Privilege[] requiredPrivs) { + if (requiredPrivs == null) { + return null; + } + List<Privilege> privList = new ArrayList<Privilege>(); + for (Privilege priv : requiredPrivs) { + if (priv.equals(Privilege.DROP)) { + hasDropPrivilege = true; + } else { + privList.add(priv); + } + } + return privList.toArray(new Privilege[0]); + } + + public boolean hasDropPrivilege() { + return hasDropPrivilege; + } + + public void setHasDropPrivilege(boolean hasDropPrivilege) { + this.hasDropPrivilege = hasDropPrivilege; + } + + public Privilege[] getReadReqPriv() { + return readReqPriv; + } + + public Privilege[] getWriteReqPriv() { + return writeReqPriv; + } + + } + } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java Thu Sep 4 02:49:46 2014 @@ -50,16 +50,9 @@ public class HivePrincipal implements Co public HivePrincipal(String name, HivePrincipalType type){ this.type = type; - if (type == HivePrincipalType.ROLE) { - // lower case role to make operations on it case insensitive - // when the old default authorization gets deprecated, this can move - // to ObjectStore code base - this.name = name.toLowerCase(); - } else { - this.name = name; - } - + this.name = name; } + public String getName() { return name; } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java Thu Sep 4 02:49:46 2014 @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -25,6 +24,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.ListIterator; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -416,4 +416,43 @@ public class SQLAuthorizationUtils { return new HiveAuthzPluginException(prefix + ": " + e.getMessage(), e); } + /** + * Validate the principal type, and convert role name to lower case + * @param hPrincipal + * @return validated principal + * @throws HiveAuthzPluginException + */ + public static HivePrincipal getValidatedPrincipal(HivePrincipal hPrincipal) + throws HiveAuthzPluginException { + if (hPrincipal == null || hPrincipal.getType() == null) { + // null principal + return hPrincipal; + } + switch (hPrincipal.getType()) { + case USER: + return hPrincipal; + case ROLE: + // lower case role names, for case insensitive behavior + return new HivePrincipal(hPrincipal.getName().toLowerCase(), hPrincipal.getType()); + default: + throw new HiveAuthzPluginException("Invalid principal type in principal " + hPrincipal); + } + } + + /** + * Calls getValidatedPrincipal on each principal in list and updates the list + * @param hivePrincipals + * @return + * @return + * @throws HiveAuthzPluginException + */ + public static List<HivePrincipal> getValidatedPrincipals(List<HivePrincipal> hivePrincipals) + throws HiveAuthzPluginException { + ListIterator<HivePrincipal> it = hivePrincipals.listIterator(); + while(it.hasNext()){ + it.set(getValidatedPrincipal(it.next())); + } + return hivePrincipals; + } + } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java Thu Sep 4 02:49:46 2014 @@ -43,17 +43,17 @@ public class SQLStdHiveAuthorizationVali private final HiveMetastoreClientFactory metastoreClientFactory; private final HiveConf conf; private final HiveAuthenticationProvider authenticator; - private final SQLStdHiveAccessController privController; + private final SQLStdHiveAccessControllerWrapper privController; public static final Log LOG = LogFactory.getLog(SQLStdHiveAuthorizationValidator.class); public SQLStdHiveAuthorizationValidator(HiveMetastoreClientFactory metastoreClientFactory, HiveConf conf, HiveAuthenticationProvider authenticator, - SQLStdHiveAccessController privController) { + SQLStdHiveAccessControllerWrapper privilegeManager) { this.metastoreClientFactory = metastoreClientFactory; this.conf = conf; this.authenticator = authenticator; - this.privController = privController; + this.privController = privilegeManager; } @Override Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizerFactory.java Thu Sep 4 02:49:46 2014 @@ -32,8 +32,8 @@ public class SQLStdHiveAuthorizerFactory @Override public HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreClientFactory, HiveConf conf, HiveAuthenticationProvider authenticator, HiveAuthzSessionContext ctx) throws HiveAuthzPluginException { - SQLStdHiveAccessController privilegeManager = - new SQLStdHiveAccessController(metastoreClientFactory, conf, authenticator, ctx); + SQLStdHiveAccessControllerWrapper privilegeManager = + new SQLStdHiveAccessControllerWrapper(metastoreClientFactory, conf, authenticator, ctx); return new HiveAuthorizerImpl( privilegeManager, new SQLStdHiveAuthorizationValidator(metastoreClientFactory, conf, authenticator, Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Thu Sep 4 02:49:46 2014 @@ -45,6 +45,8 @@ import org.apache.hadoop.fs.permission.F import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.ql.MapRedStats; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; @@ -86,6 +88,8 @@ public class SessionState { private static final String HDFS_SESSION_PATH_KEY = "_hive.hdfs.session.path"; private static final String TMP_TABLE_SPACE_KEY = "_hive.tmp_table_space"; private final Map<String, Map<String, Table>> tempTables = new HashMap<String, Map<String, Table>>(); + private final Map<String, Map<String, ColumnStatisticsObj>> tempTableColStats = + new HashMap<String, Map<String, ColumnStatisticsObj>>(); protected ClassLoader parentLoader; @@ -1145,6 +1149,10 @@ public class SessionState { return tempTables; } + public Map<String, Map<String, ColumnStatisticsObj>> getTempTableColStats() { + return tempTableColStats; + } + /** * @return ip address for user running the query */ Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java Thu Sep 4 02:49:46 2014 @@ -60,8 +60,8 @@ public class CounterStatsAggregator impl @Override public String aggregateStats(String counterGrpName, String statType) { // In case of counters, aggregation is done by JobTracker / MR AM itself - // so no need to aggregate, simply return the counter value for requested stat. - return String.valueOf(counters.getGroup(counterGrpName).getCounter(statType)); + // so no need to aggregate, simply return the counter value for requested stat. + return String.valueOf(counters.getGroup(counterGrpName).getCounter(statType)); } @Override Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Thu Sep 4 02:49:46 2014 @@ -25,10 +25,12 @@ import org.apache.commons.logging.LogFac import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -76,6 +78,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; import org.apache.hadoop.io.BytesWritable; +import java.math.BigDecimal; +import java.math.BigInteger; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -420,18 +424,22 @@ public class StatsUtils { cs.setCountDistint(csd.getLongStats().getNumDVs()); cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); + cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { cs.setCountDistint(csd.getLongStats().getNumDVs()); cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); + cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); + cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); + cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) || colType.startsWith(serdeConstants.CHAR_TYPE_NAME) || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { @@ -457,6 +465,13 @@ public class StatsUtils { cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); cs.setCountDistint(csd.getDecimalStats().getNumDVs()); cs.setNumNulls(csd.getDecimalStats().getNumNulls()); + Decimal val = csd.getDecimalStats().getHighValue(); + BigDecimal maxVal = HiveDecimal. + create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); + val = csd.getDecimalStats().getLowValue(); + BigDecimal minVal = HiveDecimal. + create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); + cs.setRange(minVal, maxVal); } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); } else { @@ -1182,6 +1197,10 @@ public class StatsUtils { return getFullyQualifiedName(dbName, tabName, partName, colName); } + public static String getFullyQualifiedTableName(String dbName, String tabName) { + return getFullyQualifiedName(dbName, tabName); + } + private static String getFullyQualifiedName(String... names) { List<String> nonNullAndEmptyNames = Lists.newArrayList(); for (String name : names) { Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java Thu Sep 4 02:49:46 2014 @@ -27,6 +27,7 @@ import java.sql.SQLRecoverableException; import java.util.HashMap; import java.util.Map; import java.util.Random; +import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -46,7 +47,8 @@ public class JDBCStatsAggregator impleme private final Log LOG = LogFactory.getLog(this.getClass().getName()); private int timeout = 30; private final String comment = "Hive stats aggregation: " + this.getClass().getName(); - private int maxRetries, waitWindow; + private int maxRetries; + private long waitWindow; private final Random r; public JDBCStatsAggregator() { @@ -57,11 +59,14 @@ public class JDBCStatsAggregator impleme @Override public boolean connect(Configuration hiveconf, Task sourceTask) { this.hiveconf = hiveconf; - timeout = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_JDBC_TIMEOUT); + timeout = (int) HiveConf.getTimeVar( + hiveconf, HiveConf.ConfVars.HIVE_STATS_JDBC_TIMEOUT, TimeUnit.SECONDS); connectionString = HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSDBCONNECTIONSTRING); String driver = HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSJDBCDRIVER); maxRetries = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_MAX); - waitWindow = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_WAIT); + waitWindow = HiveConf.getTimeVar( + hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_WAIT, TimeUnit.MILLISECONDS); + this.sourceTask = sourceTask; try { Class.forName(driver).newInstance(); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java Thu Sep 4 02:49:46 2014 @@ -30,6 +30,7 @@ import java.sql.Statement; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -48,7 +49,8 @@ public class JDBCStatsPublisher implemen private int timeout; // default timeout in sec. for JDBC connection and statements // SQL comment that identifies where the SQL statement comes from private final String comment = "Hive stats publishing: " + this.getClass().getName(); - private int maxRetries, waitWindow; + private int maxRetries; + private long waitWindow; private final Random r; public JDBCStatsPublisher() { @@ -59,9 +61,11 @@ public class JDBCStatsPublisher implemen public boolean connect(Configuration hiveconf) { this.hiveconf = hiveconf; maxRetries = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_MAX); - waitWindow = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_WAIT); + waitWindow = HiveConf.getTimeVar( + hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_WAIT, TimeUnit.MILLISECONDS); connectionString = HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSDBCONNECTIONSTRING); - timeout = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_JDBC_TIMEOUT); + timeout = (int) HiveConf.getTimeVar( + hiveconf, HiveConf.ConfVars.HIVE_STATS_JDBC_TIMEOUT, TimeUnit.SECONDS); String driver = HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSJDBCDRIVER); try { Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java Thu Sep 4 02:49:46 2014 @@ -120,7 +120,7 @@ public class CompactorMR { job.setBoolean(IS_MAJOR, isMajor); job.setBoolean(IS_COMPRESSED, sd.isCompressed()); job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString()); - job.setInt(NUM_BUCKETS, sd.getBucketColsSize()); + job.setInt(NUM_BUCKETS, sd.getNumBuckets()); job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString()); setColumnTypes(job, sd.getCols()); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java Thu Sep 4 02:49:46 2014 @@ -43,6 +43,7 @@ import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.List; import java.util.Set; +import java.util.concurrent.TimeUnit; /** * A class to initiate compactions. This will run in a separate thread. @@ -50,7 +51,6 @@ import java.util.Set; public class Initiator extends CompactorThread { static final private String CLASS_NAME = Initiator.class.getName(); static final private Log LOG = LogFactory.getLog(CLASS_NAME); - static final private int threadId = 10000; static final private String NO_COMPACTION = "NO_AUTO_COMPACTION"; @@ -63,7 +63,7 @@ public class Initiator extends Compactor try { recoverFailedCompactions(false); - int abortedThreashold = HiveConf.getIntVar(conf, + int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD); // Make sure we run through the loop once before checking to stop as this makes testing @@ -77,7 +77,7 @@ public class Initiator extends Compactor try { ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest()); ValidTxnList txns = TxnHandler.createValidTxnList(txnHandler.getOpenTxns()); - Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreashold); + Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold); LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them"); for (CompactionInfo ci : potentials) { @@ -140,13 +140,13 @@ public class Initiator extends Compactor public void init(BooleanPointer stop) throws MetaException { super.init(stop); checkInterval = - HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_CHECK_INTERVAL) * 1000; + conf.getTimeVar(HiveConf.ConfVars.HIVE_COMPACTOR_CHECK_INTERVAL, TimeUnit.MILLISECONDS) ; } private void recoverFailedCompactions(boolean remoteOnly) throws MetaException { if (!remoteOnly) txnHandler.revokeFromLocalWorkers(Worker.hostname()); - txnHandler.revokeTimedoutWorkers(HiveConf.getLongVar(conf, - HiveConf.ConfVars.HIVE_COMPACTOR_WORKER_TIMEOUT)); + txnHandler.revokeTimedoutWorkers(HiveConf.getTimeVar(conf, + HiveConf.ConfVars.HIVE_COMPACTOR_WORKER_TIMEOUT, TimeUnit.MILLISECONDS)); } // Figure out if there are any currently running compactions on the same table or partition. Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java Thu Sep 4 02:49:46 2014 @@ -34,43 +34,38 @@ import org.apache.hadoop.io.IntWritable; @WindowFunctionDescription ( - description = @Description( - name = "cume_dist", - value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " + - "statistical books) computes the position of a specified value relative to a set of values. " + - "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " + - "CUME_DIST(x) = number of values in S coming before " + - " and including x in the specified order/ N" - ), - supportsWindow = false, - pivotResult = true, - rankingFunction = true, - impliesOrder = true + description = @Description( + name = "cume_dist", + value = "_FUNC_(x) - The CUME_DIST function (defined as the inverse of percentile in some " + + "statistical books) computes the position of a specified value relative to a set of values. " + + "To compute the CUME_DIST of a value x in a set S of size N, you use the formula: " + + "CUME_DIST(x) = number of values in S coming before " + + " and including x in the specified order/ N" + ), + supportsWindow = false, + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) -public class GenericUDAFCumeDist extends GenericUDAFRank -{ +public class GenericUDAFCumeDist extends GenericUDAFRank { - static final Log LOG = LogFactory.getLog(GenericUDAFCumeDist.class.getName()); + static final Log LOG = LogFactory.getLog(GenericUDAFCumeDist.class.getName()); - @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() - { - return new GenericUDAFCumeDistEvaluator(); - } + @Override + protected GenericUDAFAbstractRankEvaluator createEvaluator() { + return new GenericUDAFCumeDistEvaluator(); + } - public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator - { + public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator { @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException - { + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { super.init(m, parameters); return ObjectInspectorFactory .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); } @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { + public Object terminate(AggregationBuffer agg) throws HiveException { List<IntWritable> ranks = ((RankBuffer) agg).rowNums; int ranksSize = ranks.size(); double ranksSizeDouble = ranksSize; Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java Thu Sep 4 02:49:46 2014 @@ -23,41 +23,38 @@ import org.apache.commons.logging.LogFac import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription; -@WindowFunctionDescription -( - description = @Description( - name = "dense_rank", - value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " + - "gaps in ranking sequence when there are ties. That is, if you were " + - "ranking a competition using DENSE_RANK and had three people tie for " + - "second place, you would say that all three were in second place and " + - "that the next person came in third." - ), - supportsWindow = false, - pivotResult = true, - rankingFunction = true, - impliesOrder = true +@WindowFunctionDescription( + description = @Description( + name = "dense_rank", + value = "_FUNC_(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no " + + "gaps in ranking sequence when there are ties. That is, if you were " + + "ranking a competition using DENSE_RANK and had three people tie for " + + "second place, you would say that all three were in second place and " + + "that the next person came in third." + ), + supportsWindow = false, + pivotResult = true, + rankingFunction = true, + impliesOrder = true ) -public class GenericUDAFDenseRank extends GenericUDAFRank -{ - static final Log LOG = LogFactory.getLog(GenericUDAFDenseRank.class.getName()); - - @Override - protected GenericUDAFAbstractRankEvaluator createEvaluator() - { - return new GenericUDAFDenseRankEvaluator(); - } - - public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator - { - /* - * Called when the value in the partition has changed. Update the currentRank - */ - @Override - protected void nextRank(RankBuffer rb) - { - rb.currentRank++; - } - } +public class GenericUDAFDenseRank extends GenericUDAFRank { + + static final Log LOG = LogFactory.getLog(GenericUDAFDenseRank.class.getName()); + + @Override + protected GenericUDAFAbstractRankEvaluator createEvaluator() { + return new GenericUDAFDenseRankEvaluator(); + } + + public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator { + + /* + * Called when the value in the partition has changed. Update the currentRank + */ + @Override + protected void nextRank(RankBuffer rb) { + rb.currentRank++; + } + } } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFFirstValue.java Thu Sep 4 02:49:46 2014 @@ -41,147 +41,128 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -@WindowFunctionDescription -( - description = @Description( - name = "first_value", - value = "_FUNC_(x)" - ), - supportsWindow = true, - pivotResult = false, - impliesOrder = true +@WindowFunctionDescription( + description = @Description( + name = "first_value", + value = "_FUNC_(x)" + ), + supportsWindow = true, + pivotResult = false, + impliesOrder = true ) -public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver -{ - static final Log LOG = LogFactory.getLog(GenericUDAFFirstValue.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException - { - if (parameters.length > 2) - { - throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); - } - if ( parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo) ) - { - throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); - } - return createEvaluator(); - } - - protected GenericUDAFFirstValueEvaluator createEvaluator() - { - return new GenericUDAFFirstValueEvaluator(); - } - - static class FirstValueBuffer implements AggregationBuffer - { - Object val; - boolean valSet; - boolean firstRow; - boolean skipNulls; - - FirstValueBuffer() - { - init(); - } - - void init() - { - val = null; - valSet = false; - firstRow = true; - skipNulls = false; - } - - } - - public static class GenericUDAFFirstValueEvaluator extends GenericUDAFEvaluator - { - ObjectInspector inputOI; - ObjectInspector outputOI; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException - { - super.init(m, parameters); - if (m != Mode.COMPLETE) - { - throw new HiveException( - "Only COMPLETE mode supported for Rank function"); - } - inputOI = parameters[0]; - outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, ObjectInspectorCopyOption.WRITABLE); - return outputOI; - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException - { - return new FirstValueBuffer(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException - { - ((FirstValueBuffer) agg).init(); - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException - { - FirstValueBuffer fb = (FirstValueBuffer) agg; - - if (fb.firstRow ) - { - fb.firstRow = false; - if ( parameters.length == 2 ) - { - fb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean( - parameters[1], - PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); - } - } - - if ( !fb.valSet ) - { - fb.val = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI, ObjectInspectorCopyOption.WRITABLE); - if ( !fb.skipNulls || fb.val != null ) - { - fb.valSet = true; - } - } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException - { - throw new HiveException("terminatePartial not supported"); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException - { - throw new HiveException("merge not supported"); - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - return ((FirstValueBuffer) agg).val; - } - +public class GenericUDAFFirstValue extends AbstractGenericUDAFResolver { + + static final Log LOG = LogFactory.getLog(GenericUDAFFirstValue.class.getName()); + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length > 2) { + throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); + } + if (parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo)) { + throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); + } + return createEvaluator(); + } + + protected GenericUDAFFirstValueEvaluator createEvaluator() { + return new GenericUDAFFirstValueEvaluator(); + } + + static class FirstValueBuffer implements AggregationBuffer { + + Object val; + boolean valSet; + boolean firstRow; + boolean skipNulls; + + FirstValueBuffer() { + init(); + } + + void init() { + val = null; + valSet = false; + firstRow = true; + skipNulls = false; + } + + } + + public static class GenericUDAFFirstValueEvaluator extends GenericUDAFEvaluator { + + ObjectInspector inputOI; + ObjectInspector outputOI; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + if (m != Mode.COMPLETE) { + throw new HiveException("Only COMPLETE mode supported for Rank function"); + } + inputOI = parameters[0]; + outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, + ObjectInspectorCopyOption.WRITABLE); + return outputOI; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new FirstValueBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((FirstValueBuffer) agg).init(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + FirstValueBuffer fb = (FirstValueBuffer) agg; + + if (fb.firstRow) { + fb.firstRow = false; + if (parameters.length == 2) { + fb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(parameters[1], + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); + } + } + + if (!fb.valSet) { + fb.val = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI, + ObjectInspectorCopyOption.WRITABLE); + if (!fb.skipNulls || fb.val != null) { + fb.valSet = true; + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + throw new HiveException("terminatePartial not supported"); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + throw new HiveException("merge not supported"); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((FirstValueBuffer) agg).val; + } + @Override public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { BoundaryDef start = wFrmDef.getStart(); BoundaryDef end = wFrmDef.getEnd(); - return new FirstValStreamingFixedWindow(this, start.getAmt(), - end.getAmt()); + return new FirstValStreamingFixedWindow(this, start.getAmt(), end.getAmt()); } - } - + } + static class ValIndexPair { + Object val; int idx; @@ -191,16 +172,15 @@ public class GenericUDAFFirstValue exten } } - static class FirstValStreamingFixedWindow extends - GenericUDAFStreamingEvaluator<Object> { + static class FirstValStreamingFixedWindow extends GenericUDAFStreamingEvaluator<Object> { class State extends GenericUDAFStreamingEvaluator<Object>.StreamingState { + private final Deque<ValIndexPair> valueChain; public State(int numPreceding, int numFollowing, AggregationBuffer buf) { super(numPreceding, numFollowing, buf); - valueChain = new ArrayDeque<ValIndexPair>(numPreceding + numFollowing - + 1); + valueChain = new ArrayDeque<ValIndexPair>(numPreceding + numFollowing + 1); } @Override @@ -222,8 +202,8 @@ public class GenericUDAFFirstValue exten */ int wdwSz = numPreceding + numFollowing + 1; - return underlying + (underlying * wdwSz) + (underlying * wdwSz) - + (3 * JavaDataModel.PRIMITIVES1); + return underlying + (underlying * wdwSz) + (underlying * wdwSz) + (3 + * JavaDataModel.PRIMITIVES1); } protected void reset() { @@ -232,8 +212,8 @@ public class GenericUDAFFirstValue exten } } - public FirstValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, - int numPreceding, int numFollowing) { + public FirstValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, int numPreceding, + int numFollowing) { super(wrappedEval, numPreceding, numFollowing); } @@ -253,8 +233,7 @@ public class GenericUDAFFirstValue exten } @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException { + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { State s = (State) agg; FirstValueBuffer fb = (FirstValueBuffer) s.wrappedBuf; @@ -266,15 +245,14 @@ public class GenericUDAFFirstValue exten wrappedEval.iterate(fb, parameters); } - Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], - inputOI(), ObjectInspectorCopyOption.WRITABLE); + Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI(), + ObjectInspectorCopyOption.WRITABLE); /* * add row to chain. except in case of UNB preceding: - only 1 firstVal * needs to be tracked. */ - if (s.numPreceding != BoundarySpec.UNBOUNDED_AMOUNT - || s.valueChain.isEmpty()) { + if (s.numPreceding != BoundarySpec.UNBOUNDED_AMOUNT || s.valueChain.isEmpty()) { /* * add value to chain if it is not null or if skipNulls is false. */ @@ -309,8 +287,7 @@ public class GenericUDAFFirstValue exten public Object terminate(AggregationBuffer agg) throws HiveException { State s = (State) agg; FirstValueBuffer fb = (FirstValueBuffer) s.wrappedBuf; - ValIndexPair r = fb.skipNulls && s.valueChain.size() == 0 ? null - : s.valueChain.getFirst(); + ValIndexPair r = fb.skipNulls && s.valueChain.size() == 0 ? null : s.valueChain.getFirst(); for (int i = 0; i < s.numFollowing; i++) { s.results.add(r == null ? null : r.val); Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java?rev=1622396&r1=1622395&r2=1622396&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFLastValue.java Thu Sep 4 02:49:46 2014 @@ -37,131 +37,107 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), supportsWindow = true, pivotResult = false, impliesOrder = true) -public class GenericUDAFLastValue extends AbstractGenericUDAFResolver -{ - static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class - .getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) - throws SemanticException - { - if (parameters.length > 2) - { - throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); - } - if ( parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo) ) - { - throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); - } - return createEvaluator(); - } - - protected GenericUDAFLastValueEvaluator createEvaluator() - { - return new GenericUDAFLastValueEvaluator(); - } - - static class LastValueBuffer implements AggregationBuffer - { - Object val; - boolean firstRow; - boolean skipNulls; - - LastValueBuffer() - { - init(); - } - - void init() - { - val = null; - firstRow = true; - skipNulls = false; - } - - } - - public static class GenericUDAFLastValueEvaluator extends - GenericUDAFEvaluator - { - ObjectInspector inputOI; - ObjectInspector outputOI; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) - throws HiveException - { - super.init(m, parameters); - if (m != Mode.COMPLETE) - { - throw new HiveException( - "Only COMPLETE mode supported for Rank function"); - } - inputOI = parameters[0]; - outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, - ObjectInspectorCopyOption.WRITABLE); - return outputOI; - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException - { - return new LastValueBuffer(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException - { - ((LastValueBuffer) agg).init(); - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException - { - LastValueBuffer lb = (LastValueBuffer) agg; - if (lb.firstRow ) - { - lb.firstRow = false; - if ( parameters.length == 2 ) - { - lb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean( - parameters[1], - PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); - } - } - - Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], - inputOI, ObjectInspectorCopyOption.WRITABLE); +@WindowFunctionDescription(description = @Description(name = "last_value", value = "_FUNC_(x)"), + supportsWindow = true, pivotResult = false, impliesOrder = true) +public class GenericUDAFLastValue extends AbstractGenericUDAFResolver { + + static final Log LOG = LogFactory.getLog(GenericUDAFLastValue.class.getName()); + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length > 2) { + throw new UDFArgumentTypeException(2, "At most 2 arguments expected"); + } + if (parameters.length > 1 && !parameters[1].equals(TypeInfoFactory.booleanTypeInfo)) { + throw new UDFArgumentTypeException(1, "second argument must be a boolean expression"); + } + return createEvaluator(); + } + + protected GenericUDAFLastValueEvaluator createEvaluator() { + return new GenericUDAFLastValueEvaluator(); + } + + static class LastValueBuffer implements AggregationBuffer { + + Object val; + boolean firstRow; + boolean skipNulls; + + LastValueBuffer() { + init(); + } + + void init() { + val = null; + firstRow = true; + skipNulls = false; + } + + } + + public static class GenericUDAFLastValueEvaluator extends GenericUDAFEvaluator { + + ObjectInspector inputOI; + ObjectInspector outputOI; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + super.init(m, parameters); + if (m != Mode.COMPLETE) { + throw new HiveException("Only COMPLETE mode supported for Rank function"); + } + inputOI = parameters[0]; + outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI, + ObjectInspectorCopyOption.WRITABLE); + return outputOI; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new LastValueBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((LastValueBuffer) agg).init(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + LastValueBuffer lb = (LastValueBuffer) agg; + if (lb.firstRow) { + lb.firstRow = false; + if (parameters.length == 2) { + lb.skipNulls = PrimitiveObjectInspectorUtils.getBoolean(parameters[1], + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); + } + } + + Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI, + ObjectInspectorCopyOption.WRITABLE); if (!lb.skipNulls || o != null) { lb.val = o; } - } + } - @Override - public Object terminatePartial(AggregationBuffer agg) - throws HiveException - { - throw new HiveException("terminatePartial not supported"); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) - throws HiveException - { - throw new HiveException("merge not supported"); - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException - { - LastValueBuffer lb = (LastValueBuffer) agg; - return lb.val; + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + throw new HiveException("terminatePartial not supported"); + } - } + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + throw new HiveException("merge not supported"); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + LastValueBuffer lb = (LastValueBuffer) agg; + return lb.val; + + } @Override public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { @@ -169,12 +145,12 @@ public class GenericUDAFLastValue extend BoundaryDef end = wFrmDef.getEnd(); return new LastValStreamingFixedWindow(this, start.getAmt(), end.getAmt()); } - } + } - static class LastValStreamingFixedWindow extends - GenericUDAFStreamingEvaluator<Object> { + static class LastValStreamingFixedWindow extends GenericUDAFStreamingEvaluator<Object> { class State extends GenericUDAFStreamingEvaluator<Object>.StreamingState { + private Object lastValue; private int lastIdx; @@ -203,8 +179,8 @@ public class GenericUDAFLastValue extend } } - public LastValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, - int numPreceding, int numFollowing) { + public LastValStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, int numPreceding, + int numFollowing) { super(wrappedEval, numPreceding, numFollowing); } @@ -224,8 +200,7 @@ public class GenericUDAFLastValue extend } @Override - public void iterate(AggregationBuffer agg, Object[] parameters) - throws HiveException { + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { State s = (State) agg; LastValueBuffer lb = (LastValueBuffer) s.wrappedBuf; @@ -237,8 +212,8 @@ public class GenericUDAFLastValue extend wrappedEval.iterate(lb, parameters); } - Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], - inputOI(), ObjectInspectorCopyOption.WRITABLE); + Object o = ObjectInspectorUtils.copyToStandardObject(parameters[0], inputOI(), + ObjectInspectorCopyOption.WRITABLE); if (!lb.skipNulls || o != null) { s.lastValue = o;
