Added: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java?rev=712905&view=auto ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java (added) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/OpParseContext.java Mon Nov 10 17:50:06 2008 @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.util.List; + +/** + * Implementation of the Operator Parse Context. It maintains the parse context + * that may be needed by an operator. Currently, it only maintains the row + * resolver and the list of columns used by the operator + **/ + +public class OpParseContext { + private RowResolver rr; // row resolver for the operator + + // list of internal column names used + private List<String> colNames; + + /** + * @param rr row resolver + */ + public OpParseContext(RowResolver rr) { + this.rr = rr; + } + + /** + * @return the row resolver + */ + public RowResolver getRR() { + return rr; + } + + /** + * @param rr the row resolver to set + */ + public void setRR(RowResolver rr) { + this.rr = rr; + } + + /** + * @return the column names desired + */ + public List<String> getColNames() { + return colNames; + } + + /** + * @param colNames the column names to set + */ + public void setColNames(List<String> colNames) { + this.colNames = colNames; + } +}
Added: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=712905&view=auto ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (added) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Mon Nov 10 17:50:06 2008 @@ -0,0 +1,264 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.parse; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.plan.loadFileDesc; +import org.apache.hadoop.hive.ql.plan.loadTableDesc; +import org.antlr.runtime.tree.CommonTree; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.conf.HiveConf; + +/** + * Parse Context: The current parse context. This is passed to the optimizer + * which then transforms the operator tree using the parse context. All the + * optimizations are performed sequentially and then the new parse context + * populated. Note that since the parse context contains the operator tree, it + * can be easily retrieved by the next optimization step or finally for task + * generation after the plan has been completely optimized. + * + **/ + +public class ParseContext { + private QB qb; + private CommonTree ast; + private HashMap<String, PartitionPruner> aliasToPruner; + private HashMap<String, SamplePruner> aliasToSamplePruner; + private HashMap<String, Operator<? extends Serializable>> topOps; + private HashMap<String, Operator<? extends Serializable>> topSelOps; + private HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx; + private List<loadTableDesc> loadTableWork; + private List<loadFileDesc> loadFileWork; + private Context ctx; + private HiveConf conf; + + /** + * @param qb + * current QB + * @param ast + * current parse tree + * @param aliasToPruner + * partition pruner list + * @param aliasToSamplePruner + * sample pruner list + * @param loadFileWork + * list of destination files being loaded + * @param loadTableWork + * list of destination tables being loaded + * @param opParseCtx + * operator parse context - contains a mapping from operator to + * operator parse state (row resolver etc.) + * @param topOps + * list of operators for the top query + * @param topSelOps + * list of operators for the selects introduced for column pruning + */ + public ParseContext(HiveConf conf, QB qb, CommonTree ast, + HashMap<String, PartitionPruner> aliasToPruner, + HashMap<String, SamplePruner> aliasToSamplePruner, + HashMap<String, Operator<? extends Serializable>> topOps, + HashMap<String, Operator<? extends Serializable>> topSelOps, + HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx, + List<loadTableDesc> loadTableWork, List<loadFileDesc> loadFileWork, + Context ctx) { + this.conf = conf; + this.qb = qb; + this.ast = ast; + this.aliasToPruner = aliasToPruner; + this.aliasToSamplePruner = aliasToSamplePruner; + this.loadFileWork = loadFileWork; + this.loadTableWork = loadTableWork; + this.opParseCtx = opParseCtx; + this.topOps = topOps; + this.topSelOps = topSelOps; + this.ctx = ctx; + } + + /** + * @return the qb + */ + public QB getQB() { + return qb; + } + + /** + * @param qb + * the qb to set + */ + public void setQB(QB qb) { + this.qb = qb; + } + + /** + * @return the context + */ + public Context getContext() { + return ctx; + } + + /** + * @param ctx + * the context to set + */ + public void setContext(Context ctx) { + this.ctx = ctx; + } + + /** + * @return the hive conf + */ + public HiveConf getConf() { + return conf; + } + + /** + * @param conf + * the conf to set + */ + public void setConf(HiveConf conf) { + this.conf = conf; + } + + /** + * @return the ast + */ + public CommonTree getParseTree() { + return ast; + } + + /** + * @param ast + * the parsetree to set + */ + public void setParseTree(CommonTree ast) { + this.ast = ast; + } + + /** + * @return the aliasToPruner + */ + public HashMap<String, PartitionPruner> getAliasToPruner() { + return aliasToPruner; + } + + /** + * @param aliasToPruner + * the aliasToPruner to set + */ + public void setAliasToPruner(HashMap<String, PartitionPruner> aliasToPruner) { + this.aliasToPruner = aliasToPruner; + } + + /** + * @return the aliasToSamplePruner + */ + public HashMap<String, SamplePruner> getAliasToSamplePruner() { + return aliasToSamplePruner; + } + + /** + * @param aliasToSamplePruner + * the aliasToSamplePruner to set + */ + public void setAliasToSamplePruner( + HashMap<String, SamplePruner> aliasToSamplePruner) { + this.aliasToSamplePruner = aliasToSamplePruner; + } + + /** + * @return the topOps + */ + public HashMap<String, Operator<? extends Serializable>> getTopOps() { + return topOps; + } + + /** + * @param topOps + * the topOps to set + */ + public void setTopOps(HashMap<String, Operator<? extends Serializable>> topOps) { + this.topOps = topOps; + } + + /** + * @return the topSelOps + */ + public HashMap<String, Operator<? extends Serializable>> getTopSelOps() { + return topSelOps; + } + + /** + * @param topSelOps + * the topSelOps to set + */ + public void setTopSelOps( + HashMap<String, Operator<? extends Serializable>> topSelOps) { + this.topSelOps = topSelOps; + } + + /** + * @return the opParseCtx + */ + public HashMap<Operator<? extends Serializable>, OpParseContext> getOpParseCtx() { + return opParseCtx; + } + + /** + * @param opParseCtx + * the opParseCtx to set + */ + public void setOpParseCtx( + HashMap<Operator<? extends Serializable>, OpParseContext> opParseCtx) { + this.opParseCtx = opParseCtx; + } + + /** + * @return the loadTableWork + */ + public List<loadTableDesc> getLoadTableWork() { + return loadTableWork; + } + + /** + * @param loadTableWork + * the loadTableWork to set + */ + public void setLoadTableWork(List<loadTableDesc> loadTableWork) { + this.loadTableWork = loadTableWork; + } + + /** + * @return the loadFileWork + */ + public List<loadFileDesc> getLoadFileWork() { + return loadFileWork; + } + + /** + * @param loadFileWork + * the loadFileWork to set + */ + public void setLoadFileWork(List<loadFileDesc> loadFileWork) { + this.loadFileWork = loadFileWork; + } +} Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java?rev=712905&r1=712904&r2=712905&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java (original) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionPruner.java Mon Nov 10 17:50:06 2008 @@ -59,6 +59,12 @@ private Table tab; private exprNodeDesc prunerExpr; + + // is set to true if the expression only contains partitioning columns and not any other column reference. + // This is used to optimize select * from table where ... scenario, when the where condition only references + // partitioning columns - the partitions are identified and streamed directly to the client without requiring + // a map-reduce job + private boolean containsPartCols; /** Creates a new instance of PartitionPruner */ public PartitionPruner(String tableAlias, QBMetaData metaData) { @@ -66,8 +72,13 @@ this.metaData = metaData; this.tab = metaData.getTableForAlias(tableAlias); this.prunerExpr = null; + containsPartCols = true; } + public boolean containsPartitionCols() { + return containsPartCols; + } + /** * We use exprNodeConstantDesc(class,null) to represent unknown values. * Except UDFOPAnd, UDFOPOr, and UDFOPNot, all UDFs are assumed to return unknown values @@ -97,12 +108,18 @@ switch (tokType) { case HiveParser.TOK_COLREF: { - assert(expr.getChildCount() == 2); - String tabAlias = expr.getChild(0).getText(); - String colName = expr.getChild(1).getText(); - if (tabAlias == null || colName == null) { - throw new SemanticException(ErrorMsg.INVALID_XPATH.getMsg(expr)); + String tabAlias = null; + String colName = null; + if (expr.getChildCount() != 1) { + assert(expr.getChildCount() == 2); + tabAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); + colName = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()); + } + else { + colName = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); + tabAlias = SemanticAnalyzer.getTabAliasForCol(this.metaData, colName, (CommonTree)expr.getChild(0)); } + // Set value to null if it's not partition column if (tabAlias.equals(tableAlias) && tab.isPartitionKey(colName)) { desc = new exprNodeColumnDesc(String.class, colName); @@ -117,6 +134,7 @@ TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector( this.metaData.getTableForAlias(tabAlias).getDeserializer().getObjectInspector()); desc = new exprNodeConstantDesc(typeInfo.getStructFieldTypeInfo(colName), null); + containsPartCols = false; } } catch (SerDeException e){ throw new RuntimeException(e); @@ -195,8 +213,8 @@ case HiveParser.TOK_COLREF: { assert(expr.getChildCount() == 2); - String tabAlias = expr.getChild(0).getText(); - String colName = expr.getChild(1).getText(); + String tabAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); + String colName = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()); if (tabAlias.equals(tableAlias) && tab.isPartitionKey(colName)) { hasPPred = true; } @@ -227,11 +245,30 @@ if (!(desc instanceof exprNodeConstantDesc) || ((exprNodeConstantDesc)desc).getValue() != null ) { LOG.trace("adding pruning expr = " + desc); if (this.prunerExpr == null) - this.prunerExpr = desc; + this.prunerExpr = desc; else this.prunerExpr = SemanticAnalyzer.getFuncExprNodeDesc("OR", this.prunerExpr, desc); } } + + /** + * Add an expression from the JOIN condition. Since these expressions will be used for all the where clauses, they + * are always ANDed. Then we walk through the remaining filters (in the where clause) and OR them with the existing + * condition. + */ + @SuppressWarnings("nls") + public void addJoinOnExpression(CommonTree expr) throws SemanticException { + LOG.trace("adding pruning Tree = " + expr.toStringTree()); + exprNodeDesc desc = genExprNodeDesc(expr); + // Ignore null constant expressions + if (!(desc instanceof exprNodeConstantDesc) || ((exprNodeConstantDesc)desc).getValue() != null ) { + LOG.trace("adding pruning expr = " + desc); + if (this.prunerExpr == null) + this.prunerExpr = desc; + else + this.prunerExpr = SemanticAnalyzer.getFuncExprNodeDesc("AND", this.prunerExpr, desc); + } + } /** From the table metadata prune the partitions to return the partitions **/ @SuppressWarnings("nls") @@ -282,7 +319,7 @@ } } else - ret_parts.add(part); + ret_parts.add(part); } } catch (Exception e) { Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=712905&r1=712904&r2=712905&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Mon Nov 10 17:50:06 2008 @@ -22,7 +22,6 @@ import org.apache.hadoop.hive.ql.parse.QBParseInfo; import org.apache.hadoop.hive.ql.parse.QBMetaData; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -158,14 +157,6 @@ } public boolean isSelectStarQuery() { - if (!qbp.isSelectStarQuery() || !aliasToSubq.isEmpty()) - return false; - - Iterator<Map.Entry<String, Table>> iter = qbm.getAliasToTable().entrySet().iterator(); - Table tab = ((Map.Entry<String, Table>)iter.next()).getValue(); - if (tab.isPartitioned()) - return false; - - return true; + return qbp.isSelectStarQuery() && aliasToSubq.isEmpty(); } } Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java?rev=712905&r1=712904&r2=712905&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (original) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java Mon Nov 10 17:50:06 2008 @@ -22,6 +22,10 @@ import org.antlr.runtime.tree.CommonTree; +/** + * Internal representation of the join tree + * + */ public class QBJoinTree { private String leftAlias; @@ -33,15 +37,29 @@ private joinCond[] joinCond; private boolean noOuterJoin; - // conditions + // join conditions private Vector<Vector<CommonTree>> expressions; + // filters + private Vector<Vector<CommonTree>> filters; + + /** + * constructor + */ public QBJoinTree() { nextTag = 0;} + /** + * returns left alias if any - this is used for merging later on + * @return left alias if any + */ public String getLeftAlias() { return leftAlias; } + /** + * set left alias for the join expression + * @param leftAlias String + */ public void setLeftAlias(String leftAlias) { this.leftAlias = leftAlias; } @@ -109,6 +127,21 @@ public void setNoOuterJoin(boolean noOuterJoin) { this.noOuterJoin = noOuterJoin; } + + /** + * @return the filters + */ + public Vector<Vector<CommonTree>> getFilters() { + return filters; + } + + /** + * @param filters the filters to set + */ + public void setFilters(Vector<Vector<CommonTree>> filters) { + this.filters = filters; + } + } Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java?rev=712905&r1=712904&r2=712905&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java (original) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java Mon Nov 10 17:50:06 2008 @@ -115,5 +115,4 @@ public Table getSrcForAlias(String alias) { return this.aliasToTable.get(alias.toLowerCase()); } - } Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java?rev=712905&r1=712904&r2=712905&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (original) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java Mon Nov 10 17:50:06 2008 @@ -32,7 +32,6 @@ public class QBParseInfo { private boolean isSubQ; - private boolean canOptTopQ; private String alias; private CommonTree joinExpr; private HashMap<String, CommonTree> aliasToSrc; @@ -67,7 +66,6 @@ this.alias = alias; this.isSubQ = isSubQ; - this.canOptTopQ = false; this.outerQueryLimit = -1; } @@ -127,6 +125,10 @@ return this.destToWhereExpr.get(clause); } + public HashMap<String, CommonTree> getDestToWhereExpr() { + return destToWhereExpr; + } + public CommonTree getGroupByForClause(String clause) { return this.destToGroupby.get(clause); } @@ -151,14 +153,6 @@ return this.isSubQ; } - public boolean getCanOptTopQ() { - return this.canOptTopQ; - } - - public void setCanOptTopQ(boolean canOptTopQ) { - this.canOptTopQ = canOptTopQ; - } - public CommonTree getJoinExpr() { return this.joinExpr; } @@ -201,7 +195,6 @@ if (isSubQ || (joinExpr != null) || (!nameToSample.isEmpty()) || - (!destToWhereExpr.isEmpty()) || (!destToGroupby.isEmpty()) || (!destToClusterby.isEmpty())) return false; Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java?rev=712905&r1=712904&r2=712905&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (original) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java Mon Nov 10 17:50:06 2008 @@ -77,14 +77,49 @@ return rslvMap.get(tab_alias.toLowerCase()) != null; } - public ColumnInfo get(String tab_alias, String col_alias) { - tab_alias = tab_alias.toLowerCase(); + /** + * Gets the column Info to tab_alias.col_alias type of a column reference. I the tab_alias is not + * provided as can be the case with an non aliased column, this function looks up the column in all + * the table aliases in this row resolver and returns the match. It also throws an exception if + * the column is found in multiple table aliases. If no match is found a null values is returned. + * + * This allows us to interpret both select t.c1 type of references and select c1 kind of refereneces. + * The later kind are what we call non aliased column references in the query. + * + * @param tab_alias The table alias to match (this is null if the column reference is non aliased) + * @param col_alias The column name that is being searched for + * @return ColumnInfo + * @throws SemanticException + */ + public ColumnInfo get(String tab_alias, String col_alias) + throws SemanticException { col_alias = col_alias.toLowerCase(); - HashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias); - if (f_map == null) { - return null; + ColumnInfo ret = null; + + if (tab_alias != null) { + tab_alias = tab_alias.toLowerCase(); + HashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias); + if (f_map == null) { + return null; + } + ret = f_map.get(col_alias); + } + else { + boolean found = false; + for(LinkedHashMap<String, ColumnInfo> cmap: rslvMap.values()) { + for(Map.Entry<String, ColumnInfo> cmapEnt: cmap.entrySet()) { + if (col_alias.equalsIgnoreCase((String)cmapEnt.getKey())) { + if (found) { + throw new SemanticException("Column " + col_alias + " Found in more than One Tables/Subqueries"); + } + found = true; + ret = (ColumnInfo)cmapEnt.getValue(); + } + } + } } - return f_map.get(col_alias); + + return ret; } public Vector<ColumnInfo> getColumnInfos() { Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePruner.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePruner.java?rev=712905&r1=712904&r2=712905&view=diff ============================================================================== --- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePruner.java (original) +++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/parse/SamplePruner.java Mon Nov 10 17:50:06 2008 @@ -22,43 +22,96 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.fs.Path; -import java.util.*; - +/** + * + * This class stores the mapping from table alias to the parse tree information of the table + * sample clause(stored in the TableSample class). + * + */ public class SamplePruner { + + /** + * Table alias for the table e.g. in case of FROM t TABLESAMPLE(1 OUT OF 2 ON rand()) a + * "a" is the table alias + */ private String tabAlias; + + /** + * The parse tree corresponding to the TABLESAMPLE clause. e.g. in case of + * FROM t TABLESAMPLE(1 OUT OF 2 ON rand()) a the parse tree of + * "TABLESAMPLE(1 OUT OF 2 ON rand())" is parsed out and stored in tableSample + */ private TableSample tableSample; - // The log - @SuppressWarnings("nls") - private static final Log LOG = LogFactory.getLog("hive.ql.parse.SamplePruner"); + + /** + * The log handle for this class + */ + @SuppressWarnings("nls") + private static final Log LOG = LogFactory.getLog("hive.ql.parse.SamplePruner"); + /** + * Constructs the SamplePruner given the table alias and the table sample + * + * @param alias The alias of the table specified in the query + * @param tableSample The parse infromation of the TABLESAMPLE clause + */ public SamplePruner(String alias, TableSample tableSample) { this.tabAlias = alias; this.tableSample = tableSample; } + + /** + * Gets the table alias + * + * @return String + */ public String getTabAlias() { return this.tabAlias; } + + /** + * Sets the table alias + * + * @param tabAlias The table alias as specified in the query + */ public void setTabAlias(String tabAlias) { this.tabAlias = tabAlias; } + + /** + * Gets the parse information of the associated table sample clause + * + * @return TableSample + */ public TableSample getTableSample() { return this.tableSample; } + + /** + * Sets the parse information of the associated table sample clause + * + * @param tableSample Information related to the table sample clause + */ public void setTableSample(TableSample tableSample) { this.tableSample = tableSample; } + /** + * Prunes to get all the files in the partition that satisfy the TABLESAMPLE clause + * + * @param part The partition to prune + * @return Path[] + * @throws SemanticException + */ @SuppressWarnings("nls") public Path[] prune(Partition part) throws SemanticException { int num = this.tableSample.getNumerator(); int den = this.tableSample.getDenominator(); int bucketCount = part.getBucketCount(); - List<String> tabBucketCols = part.getBucketCols(); - ArrayList<String> sampleCols = this.tableSample.getCols(); String fullScanMsg = ""; // check if input pruning is possible - if (sampleCols == null || sampleCols.size() == 0 || tabBucketCols.equals(sampleCols)) { + if (this.tableSample.getInputPruning()) { LOG.trace("numerator = " + num); LOG.trace("denominator = " + den); LOG.trace("bucket count = " + bucketCount);
