Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Sat Jul 26 23:45:46 2014 @@ -30,6 +30,7 @@ import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; @@ -87,6 +88,7 @@ public final class ColumnPrunerProcFacto * Node Processor for Column Pruning on Filter Operators. */ public static class ColumnPrunerFilterProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { FilterOperator op = (FilterOperator) nd; @@ -120,6 +122,7 @@ public final class ColumnPrunerProcFacto * Node Processor for Column Pruning on Group By Operators. */ public static class ColumnPrunerGroupByProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { GroupByOperator op = (GroupByOperator) nd; @@ -154,6 +157,7 @@ public final class ColumnPrunerProcFacto } public static class ColumnPrunerScriptProc implements NodeProcessor { + @Override @SuppressWarnings("unchecked") public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { @@ -224,6 +228,7 @@ public final class ColumnPrunerProcFacto * and update the RR & signature on the PTFOp. */ public static class ColumnPrunerPTFProc extends ColumnPrunerScriptProc { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { @@ -327,6 +332,7 @@ public final class ColumnPrunerProcFacto * The Default Node Processor for Column Pruning. */ public static class ColumnPrunerDefaultProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; @@ -351,6 +357,7 @@ public final class ColumnPrunerProcFacto * store needed columns in tableScanDesc. */ public static class ColumnPrunerTableScanProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { TableScanOperator scanOp = (TableScanOperator) nd; @@ -426,6 +433,7 @@ public final class ColumnPrunerProcFacto * The Node Processor for Column Pruning on Reduce Sink Operators. */ public static class ColumnPrunerReduceSinkProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ReduceSinkOperator op = (ReduceSinkOperator) nd; @@ -435,6 +443,7 @@ public final class ColumnPrunerProcFacto List<String> colLists = new ArrayList<String>(); ArrayList<ExprNodeDesc> keys = conf.getKeyCols(); + LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys); for (ExprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); } @@ -456,7 +465,6 @@ public final class ColumnPrunerProcFacto if (childCols != null) { boolean[] flags = new boolean[valCols.size()]; - Map<String, ExprNodeDesc> exprMap = op.getColumnExprMap(); for (String childCol : childCols) { int index = valColNames.indexOf(Utilities.removeValueTag(childCol)); @@ -497,6 +505,7 @@ public final class ColumnPrunerProcFacto * The Node Processor for Column Pruning on Lateral View Join Operators. */ public static class ColumnPrunerLateralViewJoinProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { LateralViewJoinOperator op = (LateralViewJoinOperator) nd; @@ -585,6 +594,7 @@ public final class ColumnPrunerProcFacto * The Node Processor for Column Pruning on Select Operators. */ public static class ColumnPrunerSelectProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { SelectOperator op = (SelectOperator) nd; @@ -748,6 +758,12 @@ public final class ColumnPrunerProcFacto nm = oldRR.reverseLookup(outputCol); } + // In case there are multiple columns referenced to the same column name, we won't + // do row resolve once more because the ColumnInfo in row resolver is already removed + if (nm == null) { + continue; + } + // Only remove information of a column if it is not a key, // i.e. this column is not appearing in keyExprs of the RS if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) { @@ -795,6 +811,7 @@ public final class ColumnPrunerProcFacto * The Node Processor for Column Pruning on Join Operators. */ public static class ColumnPrunerJoinProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { JoinOperator op = (JoinOperator) nd; @@ -817,9 +834,10 @@ public final class ColumnPrunerProcFacto * The Node Processor for Column Pruning on Map Join Operators. */ public static class ColumnPrunerMapJoinProc implements NodeProcessor { + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { - MapJoinOperator op = (MapJoinOperator) nd; + AbstractMapJoinOperator<MapJoinDesc> op = (AbstractMapJoinOperator<MapJoinDesc>) nd; pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), op .getConf().getRetainList(), true); return null; @@ -878,6 +896,7 @@ public final class ColumnPrunerProcFacto List<Operator<? extends OperatorDesc>> childOperators = op .getChildOperators(); + LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs()); List<String> childColLists = cppCtx.genColLists(op); if (childColLists == null) { return; @@ -985,6 +1004,7 @@ public final class ColumnPrunerProcFacto rs.add(col); } + LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs()); op.setColumnExprMap(newColExprMap); conf.setOutputColumnNames(outputCols); op.getSchema().setSignature(rs);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Sat Jul 26 23:45:46 2014 @@ -67,6 +67,9 @@ public class Optimizer { HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT)) { transformations.add(new GroupByOptimizer()); } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { + transformations.add(new ConstantPropagate()); + } transformations.add(new ColumnPruner()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) { transformations.add(new SkewJoinOptimizer()); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java Sat Jul 26 23:45:46 2014 @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; @@ -27,6 +28,7 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FetchTask; @@ -106,9 +108,9 @@ public class SimpleFetchOptimizer implem pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION); boolean aggressive = "more".equals(mode); + final int limit = pctx.getQB().getParseInfo().getOuterQueryLimit(); FetchData fetch = checkTree(aggressive, pctx, alias, source); - if (fetch != null && checkThreshold(fetch, pctx)) { - int limit = pctx.getQB().getParseInfo().getOuterQueryLimit(); + if (fetch != null && checkThreshold(fetch, limit, pctx)) { FetchWork fetchWork = fetch.convertToWork(); FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf()); fetchWork.setSink(fetch.completed(pctx, fetchWork)); @@ -119,7 +121,10 @@ public class SimpleFetchOptimizer implem return null; } - private boolean checkThreshold(FetchData data, ParseContext pctx) throws Exception { + private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception { + if (limit > 0 && data.hasOnlyPruningFilter()) { + return true; + } long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD); if (threshold < 0) { @@ -169,7 +174,7 @@ public class SimpleFetchOptimizer implem PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts); if (aggressive || !pruned.hasUnknownPartitions()) { bypassFilter &= !pruned.hasUnknownPartitions(); - return checkOperators(new FetchData(parent, table, pruned, splitSample), ts, + return checkOperators(new FetchData(parent, table, pruned, splitSample, bypassFilter), ts, aggressive, bypassFilter); } } @@ -211,6 +216,7 @@ public class SimpleFetchOptimizer implem private final SplitSample splitSample; private final PrunedPartitionList partsList; private final HashSet<ReadEntity> inputs = new HashSet<ReadEntity>(); + private final boolean onlyPruningFilter; // source table scan private TableScanOperator scanOp; @@ -223,14 +229,23 @@ public class SimpleFetchOptimizer implem this.table = table; this.partsList = null; this.splitSample = splitSample; + this.onlyPruningFilter = false; } private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, - SplitSample splitSample) { + SplitSample splitSample, boolean bypassFilter) { this.parent = parent; this.table = table; this.partsList = partsList; this.splitSample = splitSample; + this.onlyPruningFilter = bypassFilter; + } + + /* + * all filters were executed during partition pruning + */ + public boolean hasOnlyPruningFilter() { + return this.onlyPruningFilter; } private FetchWork convertToWork() throws HiveException { @@ -317,7 +332,12 @@ public class SimpleFetchOptimizer implem InputFormat input = HiveInputFormat.getInputFormatFromCache(clazz, conf); summary = ((ContentSummaryInputFormat)input).getContentSummary(path, conf); } else { - summary = path.getFileSystem(conf).getContentSummary(path); + FileSystem fs = path.getFileSystem(conf); + try { + summary = fs.getContentSummary(path); + } catch (FileNotFoundException e) { + return 0; + } } return summary.getLength(); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeJoinProc.java Sat Jul 26 23:45:46 2014 @@ -18,13 +18,17 @@ package org.apache.hadoop.hive.ql.optimizer; +import java.util.HashMap; +import java.util.Map; import java.util.Stack; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; public class SortedMergeJoinProc extends AbstractSMBJoinProc implements NodeProcessor { @@ -42,6 +46,11 @@ public class SortedMergeJoinProc extends JoinOperator joinOp = (JoinOperator) nd; SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx; + Map<MapJoinOperator, QBJoinTree> mapJoinMap = pGraphContext.getMapJoinContext(); + if (mapJoinMap == null) { + mapJoinMap = new HashMap<MapJoinOperator, QBJoinTree>(); + pGraphContext.setMapJoinContext(mapJoinMap); + } boolean convert = canConvertJoinToSMBJoin( Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java Sat Jul 26 23:45:46 2014 @@ -72,6 +72,7 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hive.common.util.AnnotationUtils; import org.apache.thrift.TException; import com.google.common.collect.Lists; @@ -229,7 +230,7 @@ public class StatsOptimizer implements T // our stats for NDV is approx, not accurate. return null; } - if (aggr.getGenericUDAFName().equals(GenericUDAFSum.class.getAnnotation( + if (aggr.getGenericUDAFName().equals(AnnotationUtils.getAnnotation(GenericUDAFSum.class, Description.class).name())) { if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){ return null; @@ -243,7 +244,7 @@ public class StatsOptimizer implements T ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( PrimitiveCategory.DECIMAL)); } - else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( + else if (aggr.getGenericUDAFName().equals(AnnotationUtils.getAnnotation(GenericUDAFCount.class, Description.class).name())) { Long rowCnt = 0L; if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/IntraQueryCorrelation.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/IntraQueryCorrelation.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/IntraQueryCorrelation.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/IntraQueryCorrelation.java Sat Jul 26 23:45:46 2014 @@ -52,7 +52,7 @@ public class IntraQueryCorrelation { private final Set<ReduceSinkOperator> allReduceSinkOperators; // Since we merge multiple operation paths, we assign new tags to bottom layer - // ReduceSinkOperatos. This mapping is used to map new tags to original tags associated + // ReduceSinkOperators. This mapping is used to map new tags to original tags associated // to these bottom layer ReduceSinkOperators. private final Map<Integer, Integer> newTagToOldTag; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/QueryPlanTreeTransformation.java Sat Jul 26 23:45:46 2014 @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.optimiz import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.DemuxDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.MuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -214,7 +215,7 @@ public class QueryPlanTreeTransformation } else if (op instanceof ReduceSinkOperator){ GroupByOperator pGBYm = CorrelationUtilities.getSingleParent(op, GroupByOperator.class); - if (pGBYm != null) { + if (pGBYm != null && pGBYm.getConf().getMode() == GroupByDesc.Mode.HASH) { // We get a semi join at here. // This map-side GroupByOperator needs to be removed CorrelationUtilities.removeOperator( Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java Sat Jul 26 23:45:46 2014 @@ -17,11 +17,8 @@ */ package org.apache.hadoop.hive.ql.optimizer.physical; -import java.io.Serializable; import java.util.ArrayList; -import java.util.Collection; import java.util.HashSet; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -29,33 +26,19 @@ import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.PreOrderWalker; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.NullStructSerDe; /** * @@ -72,9 +55,9 @@ import org.apache.hadoop.hive.serde2.Nul * */ public class MetadataOnlyOptimizer implements PhysicalPlanResolver { - private static final Log LOG = LogFactory.getLog(MetadataOnlyOptimizer.class.getName()); + static final Log LOG = LogFactory.getLog(MetadataOnlyOptimizer.class.getName()); - static private class WalkerCtx implements NodeProcessorCtx { + static class WalkerCtx implements NodeProcessorCtx { /* operators for which there is chance the optimization can be applied */ private final HashSet<TableScanOperator> possible = new HashSet<TableScanOperator>(); /* operators for which the optimization will be successful */ @@ -129,7 +112,6 @@ public class MetadataOnlyOptimizer imple @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - TableScanOperator node = (TableScanOperator) nd; TableScanOperator tsOp = (TableScanOperator) nd; WalkerCtx walkerCtx = (WalkerCtx) procCtx; List<Integer> colIDs = tsOp.getNeededColumnIDs(); @@ -174,145 +156,17 @@ public class MetadataOnlyOptimizer imple @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { - Dispatcher disp = new MetadataOnlyTaskDispatcher(pctx); + Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); + opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), + new TableScanProcessor()); + opRules.put(new RuleRegExp("R2", + GroupByOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), + new FileSinkProcessor()); + Dispatcher disp = new NullScanTaskDispatcher(pctx, opRules); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getRootTasks()); ogw.startWalking(topNodes, null); return pctx; } - - /** - * Iterate over all tasks one-to-one and convert them to metadata only - */ - class MetadataOnlyTaskDispatcher implements Dispatcher { - - private final PhysicalContext physicalContext; - - public MetadataOnlyTaskDispatcher(PhysicalContext context) { - super(); - physicalContext = context; - } - - private String getAliasForTableScanOperator(MapWork work, - TableScanOperator tso) { - - for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : - work.getAliasToWork().entrySet()) { - if (entry.getValue() == tso) { - return entry.getKey(); - } - } - - return null; - } - - private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc) { - if (desc != null) { - desc.setInputFileFormatClass(OneNullRowInputFormat.class); - desc.setOutputFileFormatClass(HiveIgnoreKeyTextOutputFormat.class); - desc.getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, - NullStructSerDe.class.getName()); - } - return desc; - } - - private List<String> getPathsForAlias(MapWork work, String alias) { - List<String> paths = new ArrayList<String>(); - - for (Map.Entry<String, ArrayList<String>> entry : work.getPathToAliases().entrySet()) { - if (entry.getValue().contains(alias)) { - paths.add(entry.getKey()); - } - } - - return paths; - } - - private void processAlias(MapWork work, String alias) { - // Change the alias partition desc - PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); - changePartitionToMetadataOnly(aliasPartn); - - List<String> paths = getPathsForAlias(work, alias); - for (String path : paths) { - PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); - PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc); - Path fakePath = new Path(physicalContext.getContext().getMRTmpPath() - + newPartition.getTableName() - + encode(newPartition.getPartSpec())); - work.getPathToPartitionInfo().remove(path); - work.getPathToPartitionInfo().put(fakePath.getName(), newPartition); - ArrayList<String> aliases = work.getPathToAliases().remove(path); - work.getPathToAliases().put(fakePath.getName(), aliases); - } - } - - // considered using URLEncoder, but it seemed too much - private String encode(Map<String, String> partSpec) { - return partSpec.toString().replaceAll("[:/#\\?]", "_"); - } - - @Override - public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) - throws SemanticException { - Task<? extends Serializable> task = (Task<? extends Serializable>) nd; - - Collection<Operator<? extends OperatorDesc>> topOperators - = task.getTopOperators(); - if (topOperators.size() == 0) { - return null; - } - - LOG.info("Looking for table scans where optimization is applicable"); - // create a the context for walking operators - ParseContext parseContext = physicalContext.getParseContext(); - WalkerCtx walkerCtx = new WalkerCtx(); - - Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); - opRules.put(new RuleRegExp("R1", - TableScanOperator.getOperatorName() + "%"), - new TableScanProcessor()); - opRules.put(new RuleRegExp("R2", - GroupByOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), - new FileSinkProcessor()); - - // The dispatcher fires the processor corresponding to the closest - // matching rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(null, opRules, walkerCtx); - GraphWalker ogw = new PreOrderWalker(disp); - - // Create a list of topOp nodes - ArrayList<Node> topNodes = new ArrayList<Node>(); - // Get the top Nodes for this map-reduce task - for (Operator<? extends OperatorDesc> - workOperator : topOperators) { - if (parseContext.getTopOps().values().contains(workOperator)) { - topNodes.add(workOperator); - } - } - - if (task.getReducer() != null) { - topNodes.add(task.getReducer()); - } - - ogw.startWalking(topNodes, null); - - LOG.info(String.format("Found %d metadata only table scans", - walkerCtx.getMetadataOnlyTableScans().size())); - Iterator<TableScanOperator> iterator - = walkerCtx.getMetadataOnlyTableScans().iterator(); - - while (iterator.hasNext()) { - TableScanOperator tso = iterator.next(); - ((TableScanDesc)tso.getConf()).setIsMetadataOnly(true); - MapWork work = ((MapredWork) task.getWork()).getMapWork(); - String alias = getAliasForTableScanOperator(work, tso); - LOG.info("Metadata only table scan for " + alias); - processAlias(work, alias); - } - - return null; - } - } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java Sat Jul 26 23:45:46 2014 @@ -67,6 +67,9 @@ public class PhysicalOptimizer { if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { resolvers.add(new MetadataOnlyOptimizer()); } + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) { + resolvers.add(new NullScanOptimizer()); + } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESAMPLINGFORORDERBY)) { resolvers.add(new SamplingOptimizer()); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Sat Jul 26 23:45:46 2014 @@ -222,6 +222,7 @@ public class Vectorizer implements Physi supportedGenericUDFs.add(GenericUDFCase.class); supportedGenericUDFs.add(GenericUDFWhen.class); supportedGenericUDFs.add(GenericUDFCoalesce.class); + supportedGenericUDFs.add(GenericUDFElt.class); // For type casts supportedGenericUDFs.add(UDFToLong.class); @@ -641,6 +642,16 @@ public class Vectorizer implements Physi } private boolean validateExprNodeDescRecursive(ExprNodeDesc desc) { + if (desc instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; + // Currently, we do not support vectorized virtual columns (see HIVE-5570). + for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) { + if (c.getColumn().equals(vc.getName())) { + LOG.info("Cannot vectorize virtual column " + c.getColumn()); + return false; + } + } + } String typeName = desc.getTypeInfo().getTypeName(); boolean ret = validateDataType(typeName); if (!ret) { @@ -724,6 +735,7 @@ public class Vectorizer implements Physi Map<String, Integer> cmap = new HashMap<String, Integer>(); int columnCount = 0; for (ColumnInfo c : rs.getSignature()) { + // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560). if (!isVirtualColumn(c)) { cmap.put(c.getInternalName(), columnCount++); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Sat Jul 26 23:45:46 2014 @@ -288,6 +288,7 @@ public class StatsRulesProcFactory { AnnotateStatsProcCtx aspCtx, List<String> neededCols) throws CloneNotSupportedException { long newNumRows = 0; Statistics andStats = null; + if (pred instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; GenericUDF udf = genFunc.getGenericUDF(); @@ -334,6 +335,15 @@ public class StatsRulesProcFactory { // if not boolean column return half the number of rows return stats.getNumRows() / 2; + } else if (pred instanceof ExprNodeConstantDesc) { + + // special case for handling false constants + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) pred; + if (encd.getValue().equals(false)) { + return 0; + } else { + return stats.getNumRows(); + } } return newNumRows; @@ -429,13 +439,27 @@ public class StatsRulesProcFactory { String colName = null; String tabAlias = null; boolean isConst = false; + Object prevConst = null; for (ExprNodeDesc leaf : genFunc.getChildren()) { if (leaf instanceof ExprNodeConstantDesc) { + // constant = constant expressions. We shouldn't be getting this + // after constant folding + if (isConst) { + + // special case: if both constants are not equal then return 0 + if (prevConst != null && + !prevConst.equals(((ExprNodeConstantDesc)leaf).getValue())) { + return 0; + } + return numRows; + } + // if the first argument is const then just set the flag and continue if (colName == null) { isConst = true; + prevConst = ((ExprNodeConstantDesc) leaf).getValue(); continue; } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Sat Jul 26 23:45:46 2014 @@ -50,15 +50,6 @@ import org.apache.hadoop.hive.ql.exec.Ta import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcSerde; -import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; -import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -77,9 +68,6 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.mapred.SequenceFileInputFormat; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.hadoop.mapred.TextInputFormat; import com.google.common.annotations.VisibleForTesting; @@ -87,7 +75,6 @@ import com.google.common.annotations.Vis * BaseSemanticAnalyzer. * */ -@SuppressWarnings("deprecation") public abstract class BaseSemanticAnalyzer { private static final Log STATIC_LOG = LogFactory.getLog(BaseSemanticAnalyzer.class.getName()); protected final Hive db; @@ -119,28 +106,6 @@ public abstract class BaseSemanticAnalyz protected TableAccessInfo tableAccessInfo; protected ColumnAccessInfo columnAccessInfo; - protected static final String TEXTFILE_INPUT = TextInputFormat.class - .getName(); - protected static final String TEXTFILE_OUTPUT = IgnoreKeyTextOutputFormat.class - .getName(); - protected static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class - .getName(); - protected static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class - .getName(); - protected static final String RCFILE_INPUT = RCFileInputFormat.class - .getName(); - protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class - .getName(); - protected static final String ORCFILE_INPUT = OrcInputFormat.class - .getName(); - protected static final String ORCFILE_OUTPUT = OrcOutputFormat.class - .getName(); - protected static final String ORCFILE_SERDE = OrcSerde.class - .getName(); - - protected static final String PARQUETFILE_INPUT = MapredParquetInputFormat.class.getName(); - protected static final String PARQUETFILE_OUTPUT = MapredParquetOutputFormat.class.getName(); - protected static final String PARQUETFILE_SERDE = ParquetHiveSerDe.class.getName(); public boolean skipAuthorization() { return false; @@ -154,7 +119,7 @@ public abstract class BaseSemanticAnalyz String lineDelim = null; String nullFormat = null; - protected void analyzeRowFormat(AnalyzeCreateCommonVars shared, ASTNode child) throws SemanticException { + protected void analyzeRowFormat(ASTNode child) throws SemanticException { child = (ASTNode) child.getChild(0); int numChildRowFormat = child.getChildCount(); for (int numC = 0; numC < numChildRowFormat; numC++) { @@ -190,93 +155,7 @@ public abstract class BaseSemanticAnalyz .getText()); break; default: - assert false; - } - } - } - } - - class AnalyzeCreateCommonVars { - String serde = null; - Map<String, String> serdeProps = new HashMap<String, String>(); - } - - class StorageFormat { - String inputFormat = null; - String outputFormat = null; - String storageHandler = null; - - protected boolean fillStorageFormat(ASTNode child, AnalyzeCreateCommonVars shared) { - boolean storageFormat = false; - switch(child.getToken().getType()) { - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = SEQUENCEFILE_INPUT; - outputFormat = SEQUENCEFILE_OUTPUT; - storageFormat = true; - break; - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = TEXTFILE_INPUT; - outputFormat = TEXTFILE_OUTPUT; - storageFormat = true; - break; - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFILE_INPUT; - outputFormat = RCFILE_OUTPUT; - if (shared.serde == null) { - shared.serde = conf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE); - } - storageFormat = true; - break; - case HiveParser.TOK_TBLORCFILE: - inputFormat = ORCFILE_INPUT; - outputFormat = ORCFILE_OUTPUT; - shared.serde = ORCFILE_SERDE; - storageFormat = true; - break; - case HiveParser.TOK_TBLPARQUETFILE: - inputFormat = PARQUETFILE_INPUT; - outputFormat = PARQUETFILE_OUTPUT; - shared.serde = PARQUETFILE_SERDE; - storageFormat = true; - break; - case HiveParser.TOK_TABLEFILEFORMAT: - inputFormat = unescapeSQLString(child.getChild(0).getText()); - outputFormat = unescapeSQLString(child.getChild(1).getText()); - storageFormat = true; - break; - case HiveParser.TOK_STORAGEHANDLER: - storageHandler = unescapeSQLString(child.getChild(0).getText()); - if (child.getChildCount() == 2) { - readProps( - (ASTNode) (child.getChild(1).getChild(0)), - shared.serdeProps); - } - storageFormat = true; - break; - } - return storageFormat; - } - - protected void fillDefaultStorageFormat(AnalyzeCreateCommonVars shared) { - if ((inputFormat == null) && (storageHandler == null)) { - if ("SequenceFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { - inputFormat = SEQUENCEFILE_INPUT; - outputFormat = SEQUENCEFILE_OUTPUT; - } else if ("RCFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { - inputFormat = RCFILE_INPUT; - outputFormat = RCFILE_OUTPUT; - shared.serde = conf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE); - } else if ("ORC".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { - inputFormat = ORCFILE_INPUT; - outputFormat = ORCFILE_OUTPUT; - shared.serde = ORCFILE_SERDE; - } else if ("PARQUET".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { - inputFormat = PARQUETFILE_INPUT; - outputFormat = PARQUETFILE_OUTPUT; - shared.serde = PARQUETFILE_SERDE; - } else { - inputFormat = TEXTFILE_INPUT; - outputFormat = TEXTFILE_OUTPUT; + throw new AssertionError("Unkown Token: " + rowChild); } } } @@ -607,13 +486,6 @@ public abstract class BaseSemanticAnalyz return getColumns(ast, true); } - protected void handleGenericFileFormat(ASTNode node) throws SemanticException{ - - ASTNode child = (ASTNode)node.getChild(0); - throw new SemanticException("Unrecognized file format in STORED AS clause:"+ - " "+ (child == null ? "" : child.getText())); - } - /** * Get the list of FieldSchema out of the ASTNode. */ @@ -920,7 +792,7 @@ public abstract class BaseSemanticAnalyz /** * Sets the table access information. * - * @param taInfo The TableAccessInfo structure that is set in the optimization phase. + * @param tableAccessInfo The TableAccessInfo structure that is set in the optimization phase. */ public void setTableAccessInfo(TableAccessInfo tableAccessInfo) { this.tableAccessInfo = tableAccessInfo; @@ -966,7 +838,7 @@ public abstract class BaseSemanticAnalyz * @throws HiveException */ public final boolean isValidPrefixSpec(Table tTable, Map<String, String> spec) - throws HiveException { + throws HiveException { // TODO - types need to be checked. List<FieldSchema> partCols = tTable.getPartitionKeys(); @@ -1313,7 +1185,7 @@ public abstract class BaseSemanticAnalyz return getTable(currentDb, tblName, throwException); } - // qnName : possibly contains database name (dot seperated) + // qnName : possibly contains database name (dot separated) protected Table getTableWithQN(String qnName, boolean throwException) throws SemanticException { int dot = qnName.indexOf('.'); if (dot < 0) { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Sat Jul 26 23:45:46 2014 @@ -300,8 +300,11 @@ public class ColumnStatsSemanticAnalyzer rewrittenQueryBuilder.append(numBitVectors); rewrittenQueryBuilder.append(" )"); } - for (FieldSchema fs : tbl.getPartCols()) { - rewrittenQueryBuilder.append(" , " + fs.getName()); + + if (isPartitionStats) { + for (FieldSchema fs : tbl.getPartCols()) { + rewrittenQueryBuilder.append(" , " + fs.getName()); + } } rewrittenQueryBuilder.append(" from "); rewrittenQueryBuilder.append(tbl.getTableName()); @@ -355,6 +358,7 @@ public class ColumnStatsSemanticAnalyzer originalTree = tree; boolean isPartitionStats = isPartitionLevelStats(tree); Map<String,String> partSpec = null; + checkIfTemporaryTable(); checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); validateSpecifiedColumnNames(colNames); @@ -402,6 +406,13 @@ public class ColumnStatsSemanticAnalyzer } } + private void checkIfTemporaryTable() throws SemanticException { + if (tbl.isTemporary()) { + throw new SemanticException(tbl.getTableName() + + " is a temporary table. Column statistics are not supported on temporary tables."); + } + } + @Override public void analyze(ASTNode ast, Context origCtx) throws SemanticException { QB qb; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Sat Jul 26 23:45:46 2014 @@ -57,7 +57,6 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ArchiveUtils; -import org.apache.hadoop.hive.ql.exec.DDLTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Task; @@ -117,6 +116,7 @@ import org.apache.hadoop.hive.ql.plan.Re import org.apache.hadoop.hive.ql.plan.RoleDDLDesc; import org.apache.hadoop.hive.ql.plan.ShowColumnsDesc; import org.apache.hadoop.hive.ql.plan.ShowCompactionsDesc; +import org.apache.hadoop.hive.ql.plan.ShowConfDesc; import org.apache.hadoop.hive.ql.plan.ShowCreateTableDesc; import org.apache.hadoop.hive.ql.plan.ShowDatabasesDesc; import org.apache.hadoop.hive.ql.plan.ShowFunctionsDesc; @@ -334,6 +334,10 @@ public class DDLSemanticAnalyzer extends ctx.setResFile(ctx.getLocalTmpPath()); analyzeShowTxns(ast); break; + case HiveParser.TOK_SHOWCONF: + ctx.setResFile(ctx.getLocalTmpPath()); + analyzeShowConf(ast); + break; case HiveParser.TOK_DESCFUNCTION: ctx.setResFile(ctx.getLocalTmpPath()); analyzeDescFunction(ast); @@ -1032,17 +1036,16 @@ public class DDLSemanticAnalyzer extends String indexComment = null; RowFormatParams rowFormatParams = new RowFormatParams(); - StorageFormat storageFormat = new StorageFormat(); - AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars(); + StorageFormat storageFormat = new StorageFormat(conf); for (int idx = 4; idx < ast.getChildCount(); idx++) { ASTNode child = (ASTNode) ast.getChild(idx); - if (storageFormat.fillStorageFormat(child, shared)) { + if (storageFormat.fillStorageFormat(child)) { continue; } switch (child.getToken().getType()) { case HiveParser.TOK_TABLEROWFORMAT: - rowFormatParams.analyzeRowFormat(shared, child); + rowFormatParams.analyzeRowFormat(child); break; case HiveParser.TOK_CREATEINDEX_INDEXTBLNAME: ASTNode ch = (ASTNode) child.getChild(0); @@ -1063,10 +1066,10 @@ public class DDLSemanticAnalyzer extends break; case HiveParser.TOK_TABLESERIALIZER: child = (ASTNode) child.getChild(0); - shared.serde = unescapeSQLString(child.getChild(0).getText()); + storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText())); if (child.getChildCount() == 2) { readProps((ASTNode) (child.getChild(1).getChild(0)), - shared.serdeProps); + storageFormat.getSerdeProps()); } break; case HiveParser.TOK_INDEXCOMMENT: @@ -1075,14 +1078,14 @@ public class DDLSemanticAnalyzer extends } } - storageFormat.fillDefaultStorageFormat(shared); + storageFormat.fillDefaultStorageFormat(); CreateIndexDesc crtIndexDesc = new CreateIndexDesc(tableName, indexName, - indexedCols, indexTableName, deferredRebuild, storageFormat.inputFormat, - storageFormat.outputFormat, - storageFormat.storageHandler, typeName, location, idxProps, tblProps, - shared.serde, shared.serdeProps, rowFormatParams.collItemDelim, + indexedCols, indexTableName, deferredRebuild, storageFormat.getInputFormat(), + storageFormat.getOutputFormat(), + storageFormat.getStorageHandler(), typeName, location, idxProps, tblProps, + storageFormat.getSerde(), storageFormat.getSerdeProps(), rowFormatParams.collItemDelim, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.lineDelim, rowFormatParams.mapKeyDelim, indexComment); Task<?> createIndex = @@ -1317,69 +1320,15 @@ public class DDLSemanticAnalyzer extends HashMap<String, String> partSpec) throws SemanticException { - String inputFormat = null; - String outputFormat = null; - String storageHandler = null; - String serde = null; + StorageFormat format = new StorageFormat(conf); ASTNode child = (ASTNode) ast.getChild(0); - switch (child.getToken().getType()) { - case HiveParser.TOK_TABLEFILEFORMAT: - inputFormat = unescapeSQLString(((ASTNode) child.getChild(0)).getToken() - .getText()); - outputFormat = unescapeSQLString(((ASTNode) child.getChild(1)).getToken() - .getText()); - serde = unescapeSQLString(((ASTNode) child.getChild(2)).getToken() - .getText()); - try { - Class.forName(inputFormat); - Class.forName(outputFormat); - Class.forName(serde); - } catch (ClassNotFoundException e) { - throw new SemanticException(e); - } - break; - case HiveParser.TOK_STORAGEHANDLER: - storageHandler = - unescapeSQLString(((ASTNode) child.getChild(1)).getToken().getText()); - try { - Class.forName(storageHandler); - } catch (ClassNotFoundException e) { - throw new SemanticException(e); - } - break; - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = SEQUENCEFILE_INPUT; - outputFormat = SEQUENCEFILE_OUTPUT; - serde = org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName(); - break; - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = TEXTFILE_INPUT; - outputFormat = TEXTFILE_OUTPUT; - serde = org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName(); - break; - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFILE_INPUT; - outputFormat = RCFILE_OUTPUT; - serde = conf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE); - break; - case HiveParser.TOK_TBLORCFILE: - inputFormat = ORCFILE_INPUT; - outputFormat = ORCFILE_OUTPUT; - serde = ORCFILE_SERDE; - break; - case HiveParser.TOK_TBLPARQUETFILE: - inputFormat = PARQUETFILE_INPUT; - outputFormat = PARQUETFILE_OUTPUT; - serde = PARQUETFILE_SERDE; - break; - case HiveParser.TOK_FILEFORMAT_GENERIC: - handleGenericFileFormat(child); - break; + if (!format.fillStorageFormat(child)) { + throw new AssertionError("Unknown token " + child.getText()); } - AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, inputFormat, - outputFormat, serde, storageHandler, partSpec); + AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, format.getInputFormat(), + format.getOutputFormat(), format.getSerde(), format.getStorageHandler(), partSpec); addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), @@ -2338,7 +2287,15 @@ public class DDLSemanticAnalyzer extends ctx.setNeedLockMgr(true); } - /** + private void analyzeShowConf(ASTNode ast) throws SemanticException { + String confName = stripQuotes(ast.getChild(0).getText()); + ShowConfDesc showConfDesc = new ShowConfDesc(ctx.getResFile(), confName); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + showConfDesc), conf)); + setFetchTask(createFetchTask(showConfDesc.getSchema())); + } + + /** * Add the task according to the parsed command tree. This is used for the CLI * command "LOCK TABLE ..;". * Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java Sat Jul 26 23:45:46 2014 @@ -24,7 +24,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.ResourceType; import org.apache.hadoop.hive.metastore.api.ResourceUri; @@ -32,14 +31,13 @@ import org.apache.hadoop.hive.ql.ErrorMs import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.FunctionUtils; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.hooks.Entity.Type; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.CreateFunctionDesc; import org.apache.hadoop.hive.ql.plan.DropFunctionDesc; import org.apache.hadoop.hive.ql.plan.FunctionWork; import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.session.SessionState; /** * FunctionSemanticAnalyzer. @@ -78,7 +76,7 @@ public class FunctionSemanticAnalyzer ex // find any referenced resources List<ResourceUri> resources = getResourceList(ast); - + CreateFunctionDesc desc = new CreateFunctionDesc(functionName, isTemporaryFunction, className, resources); rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf)); @@ -152,15 +150,22 @@ public class FunctionSemanticAnalyzer ex } /** - * Add write entities to the semantic analyzer to restrict function creation to priviliged users. + * Add write entities to the semantic analyzer to restrict function creation to privileged users. */ private void addEntities(String functionName, boolean isTemporaryFunction) throws SemanticException { + // If the function is being added under a database 'namespace', then add an entity representing + // the database (only applicable to permanent/metastore functions). + // We also add a second entity representing the function name. + // The authorization api implementation can decide which entities it wants to use to + // authorize the create/drop function call. + + // Add the relevant database 'namespace' as a WriteEntity Database database = null; - if (isTemporaryFunction) { - // This means temp function creation is also restricted. - database = getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME); - } else { + + // temporary functions don't have any database 'namespace' associated with it, + // it matters only for permanent functions + if (!isTemporaryFunction) { try { String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts(functionName); String dbName = qualifiedNameParts[0]; @@ -173,5 +178,9 @@ public class FunctionSemanticAnalyzer ex if (database != null) { outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_NO_LOCK)); } + + // Add the function name as a WriteEntity + outputs.add(new WriteEntity(database, functionName, Type.FUNCTION, + WriteEntity.WriteType.DDL_NO_LOCK)); } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Sat Jul 26 23:45:46 2014 @@ -151,11 +151,6 @@ KW_KEY_TYPE: '$KEY$'; KW_LINES: 'LINES'; KW_STORED: 'STORED'; KW_FILEFORMAT: 'FILEFORMAT'; -KW_SEQUENCEFILE: 'SEQUENCEFILE'; -KW_TEXTFILE: 'TEXTFILE'; -KW_RCFILE: 'RCFILE'; -KW_ORCFILE: 'ORC'; -KW_PARQUETFILE: 'PARQUET'; KW_INPUTFORMAT: 'INPUTFORMAT'; KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; KW_INPUTDRIVER: 'INPUTDRIVER'; @@ -296,6 +291,7 @@ KW_COMPACTIONS: 'COMPACTIONS'; KW_TRANSACTIONS: 'TRANSACTIONS'; KW_REWRITE : 'REWRITE'; KW_AUTHORIZATION: 'AUTHORIZATION'; +KW_CONF: 'CONF'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Sat Jul 26 23:45:46 2014 @@ -163,6 +163,7 @@ TOK_SHOW_CREATETABLE; TOK_SHOW_TABLESTATUS; TOK_SHOW_TBLPROPERTIES; TOK_SHOWLOCKS; +TOK_SHOWCONF; TOK_LOCKTABLE; TOK_UNLOCKTABLE; TOK_LOCKDB; @@ -182,11 +183,6 @@ TOK_TABLEROWFORMATCOLLITEMS; TOK_TABLEROWFORMATMAPKEYS; TOK_TABLEROWFORMATLINES; TOK_TABLEROWFORMATNULL; -TOK_TBLORCFILE; -TOK_TBLPARQUETFILE; -TOK_TBLSEQUENCEFILE; -TOK_TBLTEXTFILE; -TOK_TBLRCFILE; TOK_TABLEFILEFORMAT; TOK_FILEFORMAT_GENERIC; TOK_OFFLINE; @@ -267,6 +263,8 @@ TOK_ROLE; TOK_RESOURCE_ALL; TOK_GRANT_WITH_OPTION; TOK_GRANT_WITH_ADMIN_OPTION; +TOK_ADMIN_OPTION_FOR; +TOK_GRANT_OPTION_FOR; TOK_PRIV_ALL; TOK_PRIV_ALTER_METADATA; TOK_PRIV_ALTER_DATA; @@ -804,7 +802,7 @@ databaseComment createTableStatement @init { pushMsg("create table statement", state); } @after { popMsg(state); } - : KW_CREATE (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName + : KW_CREATE (temp=KW_TEMPORARY)? (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName ( like=KW_LIKE likeName=tableName tableLocation? tablePropertiesPrefixed? @@ -819,7 +817,7 @@ createTableStatement tablePropertiesPrefixed? (KW_AS selectStatementWithCTE)? ) - -> ^(TOK_CREATETABLE $name $ext? ifNotExists? + -> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists? ^(TOK_LIKETABLE $likeName?) columnNameTypeList? tableComment? @@ -1265,12 +1263,7 @@ alterStatementSuffixCompact fileFormat @init { pushMsg("file format specification", state); } @after { popMsg(state); } - : KW_SEQUENCEFILE -> ^(TOK_TBLSEQUENCEFILE) - | KW_TEXTFILE -> ^(TOK_TBLTEXTFILE) - | KW_RCFILE -> ^(TOK_TBLRCFILE) - | KW_ORCFILE -> ^(TOK_TBLORCFILE) - | KW_PARQUETFILE -> ^(TOK_TBLPARQUETFILE) - | KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral KW_SERDE serdeCls=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? + : KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral KW_SERDE serdeCls=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $serdeCls $inDriver? $outDriver?) | genericSpec=identifier -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) ; @@ -1336,6 +1329,7 @@ showStatement -> ^(TOK_SHOWINDEXES showStmtIdentifier $showOptions? $db_name?) | KW_SHOW KW_COMPACTIONS -> ^(TOK_SHOW_COMPACTIONS) | KW_SHOW KW_TRANSACTIONS -> ^(TOK_SHOW_TRANSACTIONS) + | KW_SHOW KW_CONF StringLiteral -> ^(TOK_SHOWCONF StringLiteral) ; lockStatement @@ -1395,8 +1389,8 @@ grantPrivileges revokePrivileges @init {pushMsg("revoke privileges", state);} @afer {popMsg(state);} - : KW_REVOKE privilegeList privilegeObject? KW_FROM principalSpecification - -> ^(TOK_REVOKE privilegeList principalSpecification privilegeObject?) + : KW_REVOKE grantOptionFor? privilegeList privilegeObject? KW_FROM principalSpecification + -> ^(TOK_REVOKE privilegeList principalSpecification privilegeObject? grantOptionFor?) ; grantRole @@ -1409,8 +1403,8 @@ grantRole revokeRole @init {pushMsg("revoke role", state);} @after {popMsg(state);} - : KW_REVOKE KW_ROLE? identifier (COMMA identifier)* KW_FROM principalSpecification withAdminOption? - -> ^(TOK_REVOKE_ROLE principalSpecification withAdminOption? identifier+) + : KW_REVOKE adminOptionFor? KW_ROLE? identifier (COMMA identifier)* KW_FROM principalSpecification + -> ^(TOK_REVOKE_ROLE principalSpecification adminOptionFor? identifier+) ; showRoleGrants @@ -1533,6 +1527,20 @@ withGrantOption -> ^(TOK_GRANT_WITH_OPTION) ; +grantOptionFor +@init {pushMsg("grant option for", state);} +@after {popMsg(state);} + : KW_GRANT KW_OPTION KW_FOR + -> ^(TOK_GRANT_OPTION_FOR) +; + +adminOptionFor +@init {pushMsg("admin option for", state);} +@after {popMsg(state);} + : KW_ADMIN KW_OPTION KW_FOR + -> ^(TOK_ADMIN_OPTION_FOR) +; + withAdminOption @init {pushMsg("with admin option", state);} @after {popMsg(state);} @@ -1808,12 +1816,7 @@ tableFileFormat @init { pushMsg("table file format specification", state); } @after { popMsg(state); } : - KW_STORED KW_AS KW_SEQUENCEFILE -> TOK_TBLSEQUENCEFILE - | KW_STORED KW_AS KW_TEXTFILE -> TOK_TBLTEXTFILE - | KW_STORED KW_AS KW_RCFILE -> TOK_TBLRCFILE - | KW_STORED KW_AS KW_ORCFILE -> TOK_TBLORCFILE - | KW_STORED KW_AS KW_PARQUETFILE -> TOK_TBLPARQUETFILE - | KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? + KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) | KW_STORED KW_BY storageHandler=StringLiteral (KW_WITH KW_SERDEPROPERTIES serdeprops=tableProperties)? Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Sat Jul 26 23:45:46 2014 @@ -538,5 +538,5 @@ functionIdentifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_PARQUETFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_AN ALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION ; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java Sat Jul 26 23:45:46 2014 @@ -92,6 +92,7 @@ public class ImportSemanticAnalyzer exte table.getTableName(), false, // isExternal: set to false here, can be overwritten by the // IMPORT stmt + table.isTemporary(), table.getSd().getCols(), table.getPartitionKeys(), table.getSd().getBucketCols(), Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java Sat Jul 26 23:45:46 2014 @@ -22,12 +22,11 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -216,4 +215,18 @@ public final class ParseUtils { return TypeInfoFactory.getDecimalTypeInfo(precision, scale); } + public static String ensureClassExists(String className) + throws SemanticException { + if (className == null) { + return null; + } + try { + Class.forName(className, true, JavaUtils.getClassLoader()); + } catch (ClassNotFoundException e) { + throw new SemanticException("Cannot find class '" + className + "'", e); + } + return className; + } + + } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sat Jul 26 23:45:46 2014 @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; +import static org.apache.hadoop.hive.metastore.MetaStoreUtils.DATABASE_WAREHOUSE_SUFFIX; import java.io.IOException; import java.io.Serializable; @@ -30,6 +31,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; @@ -405,7 +407,11 @@ public class SemanticAnalyzer extends Ba LinkedHashMap<String, ASTNode> aggregationTrees = new LinkedHashMap<String, ASTNode>(); List<ASTNode> wdwFns = new ArrayList<ASTNode>(); for (int i = 0; i < selExpr.getChildCount(); ++i) { - ASTNode function = (ASTNode) selExpr.getChild(i).getChild(0); + ASTNode function = (ASTNode) selExpr.getChild(i); + if (function.getType() == HiveParser.TOK_SELEXPR || + function.getType() == HiveParser.TOK_SUBQUERY_EXPR) { + function = (ASTNode)function.getChild(0); + } doPhase1GetAllAggregations(function, aggregationTrees, wdwFns); } @@ -1176,10 +1182,6 @@ public class SemanticAnalyzer extends Ba return phase1Result; } - private void getMetaData(QBExpr qbexpr) throws SemanticException { - getMetaData(qbexpr, null); - } - private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) throws SemanticException { if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { @@ -1355,8 +1357,7 @@ public class SemanticAnalyzer extends Ba } RowFormatParams rowFormatParams = new RowFormatParams(); - AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars(); - StorageFormat storageFormat = new StorageFormat(); + StorageFormat storageFormat = new StorageFormat(conf); LOG.info("Get metadata for destination tables"); // Go over all the destination structures and populate the related @@ -1451,10 +1452,16 @@ public class SemanticAnalyzer extends Ba int numCh = ast.getChildCount(); for (int num = 1; num < numCh ; num++){ ASTNode child = (ASTNode) ast.getChild(num); - if (ast.getChild(num) != null){ + if (child != null) { + if (storageFormat.fillStorageFormat(child)) { + localDirectoryDesc.setOutputFormat(storageFormat.getOutputFormat()); + localDirectoryDesc.setSerName(storageFormat.getSerde()); + localDirectoryDescIsSet = true; + continue; + } switch (child.getToken().getType()) { case HiveParser.TOK_TABLEROWFORMAT: - rowFormatParams.analyzeRowFormat(shared, child); + rowFormatParams.analyzeRowFormat(child); localDirectoryDesc.setFieldDelim(rowFormatParams.fieldDelim); localDirectoryDesc.setLineDelim(rowFormatParams.lineDelim); localDirectoryDesc.setCollItemDelim(rowFormatParams.collItemDelim); @@ -1465,18 +1472,8 @@ public class SemanticAnalyzer extends Ba break; case HiveParser.TOK_TABLESERIALIZER: ASTNode serdeChild = (ASTNode) child.getChild(0); - shared.serde = unescapeSQLString(serdeChild.getChild(0).getText()); - localDirectoryDesc.setSerName(shared.serde); - localDirectoryDescIsSet=true; - break; - case HiveParser.TOK_TBLSEQUENCEFILE: - case HiveParser.TOK_TBLTEXTFILE: - case HiveParser.TOK_TBLRCFILE: - case HiveParser.TOK_TBLORCFILE: - case HiveParser.TOK_TABLEFILEFORMAT: - storageFormat.fillStorageFormat(child, shared); - localDirectoryDesc.setOutputFormat(storageFormat.outputFormat); - localDirectoryDesc.setSerName(shared.serde); + storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText())); + localDirectoryDesc.setSerName(storageFormat.getSerde()); localDirectoryDescIsSet=true; break; } @@ -4392,11 +4389,17 @@ public class SemanticAnalyzer extends Ba } } + // Optimize the scenario when there are no grouping keys - only 1 reducer is needed + int numReducers = -1; + if (grpByExprs.isEmpty()) { + numReducers = 1; + } + ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, + distinctColIndices, outputKeyColumnNames, outputValueColumnNames, + true, -1, keyLength, numReducers); + ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, - keyLength, reduceValues, distinctColIndices, - outputKeyColumnNames, outputValueColumnNames, true, -1, keyLength, - -1), new RowSchema(reduceSinkOutputRowResolver + OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(reduceSinkOutputRowResolver .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver); rsOp.setColumnExprMap(colExprMap); return rsOp; @@ -10007,6 +10010,7 @@ public class SemanticAnalyzer extends Ba Map<String, String> tblProps = null; boolean ifNotExists = false; boolean isExt = false; + boolean isTemporary = false; ASTNode selectStmt = null; final int CREATE_TABLE = 0; // regular CREATE TABLE final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) @@ -10018,8 +10022,7 @@ public class SemanticAnalyzer extends Ba boolean storedAsDirs = false; RowFormatParams rowFormatParams = new RowFormatParams(); - StorageFormat storageFormat = new StorageFormat(); - AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars(); + StorageFormat storageFormat = new StorageFormat(conf); LOG.info("Creating table " + tableName + " position=" + ast.getCharPositionInLine()); @@ -10033,7 +10036,7 @@ public class SemanticAnalyzer extends Ba */ for (int num = 1; num < numCh; num++) { ASTNode child = (ASTNode) ast.getChild(num); - if (storageFormat.fillStorageFormat(child, shared)) { + if (storageFormat.fillStorageFormat(child)) { continue; } switch (child.getToken().getType()) { @@ -10043,6 +10046,9 @@ public class SemanticAnalyzer extends Ba case HiveParser.KW_EXTERNAL: isExt = true; break; + case HiveParser.KW_TEMPORARY: + isTemporary = true; + break; case HiveParser.TOK_LIKETABLE: if (child.getChildCount() > 0) { likeTableName = getUnescapedName((ASTNode) child.getChild(0)); @@ -10102,7 +10108,7 @@ public class SemanticAnalyzer extends Ba } break; case HiveParser.TOK_TABLEROWFORMAT: - rowFormatParams.analyzeRowFormat(shared, child); + rowFormatParams.analyzeRowFormat(child); break; case HiveParser.TOK_TABLELOCATION: location = unescapeSQLString(child.getChild(0).getText()); @@ -10114,16 +10120,12 @@ public class SemanticAnalyzer extends Ba break; case HiveParser.TOK_TABLESERIALIZER: child = (ASTNode) child.getChild(0); - shared.serde = unescapeSQLString(child.getChild(0).getText()); + storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText())); if (child.getChildCount() == 2) { readProps((ASTNode) (child.getChild(1).getChild(0)), - shared.serdeProps); + storageFormat.getSerdeProps()); } break; - - case HiveParser.TOK_FILEFORMAT_GENERIC: - handleGenericFileFormat(child); - break; case HiveParser.TOK_TABLESKEWED: /** * Throw an error if the user tries to use the DDL with @@ -10144,9 +10146,9 @@ public class SemanticAnalyzer extends Ba } } - storageFormat.fillDefaultStorageFormat(shared); + storageFormat.fillDefaultStorageFormat(); - if ((command_type == CTAS) && (storageFormat.storageHandler != null)) { + if ((command_type == CTAS) && (storageFormat.getStorageHandler() != null)) { throw new SemanticException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg()); } @@ -10158,7 +10160,8 @@ public class SemanticAnalyzer extends Ba return null; } } catch (HiveException e) { - e.printStackTrace(); + // should not occur since second parameter to getTableWithQN is false + throw new IllegalStateException("Unxpected Exception thrown: " + e.getMessage(), e); } } @@ -10166,6 +10169,27 @@ public class SemanticAnalyzer extends Ba String dbName = qualified.length == 1 ? SessionState.get().getCurrentDatabase() : qualified[0]; Database database = getDatabase(dbName); outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_SHARED)); + + if (isTemporary) { + if (partCols.size() > 0) { + throw new SemanticException("Partition columns are not supported on temporary tables"); + } + + if (location == null) { + // for temporary tables we set the location to something in the session's scratch dir + // it has the same life cycle as the tmp table + try { + // Generate a unique ID for temp table path. + // This path will be fixed for the life of the temp table. + Path path = new Path(SessionState.getTempTableSpace(conf), UUID.randomUUID().toString()); + path = Warehouse.getDnsPath(path, conf); + location = path.toString(); + } catch (MetaException err) { + throw new SemanticException("Error while generating temp table path:", err); + } + } + } + // Handle different types of CREATE TABLE command CreateTableDesc crtTblDesc = null; switch (command_type) { @@ -10173,13 +10197,13 @@ public class SemanticAnalyzer extends Ba case CREATE_TABLE: // REGULAR CREATE TABLE DDL tblProps = addDefaultProperties(tblProps); - crtTblDesc = new CreateTableDesc(tableName, isExt, cols, partCols, + crtTblDesc = new CreateTableDesc(tableName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, - storageFormat.inputFormat, storageFormat.outputFormat, location, shared.serde, - storageFormat.storageHandler, shared.serdeProps, tblProps, ifNotExists, skewedColNames, + storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), + storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues); crtTblDesc.setStoredAsSubDirectories(storedAsDirs); crtTblDesc.setNullFormat(rowFormatParams.nullFormat); @@ -10195,9 +10219,17 @@ public class SemanticAnalyzer extends Ba case CTLT: // create table like <tbl_name> tblProps = addDefaultProperties(tblProps); - CreateTableLikeDesc crtTblLikeDesc = new CreateTableLikeDesc(tableName, isExt, - storageFormat.inputFormat, storageFormat.outputFormat, location, - shared.serde, shared.serdeProps, tblProps, ifNotExists, likeTableName); + if (isTemporary) { + Table likeTable = getTableWithQN(likeTableName, false); + if (likeTable != null && likeTable.getPartCols().size() > 0) { + throw new SemanticException("Partition columns are not supported on temporary tables " + + "and source table in CREATE TABLE LIKE is partitioned."); + } + } + CreateTableLikeDesc crtTblLikeDesc = new CreateTableLikeDesc(tableName, isExt, isTemporary, + storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, + storageFormat.getSerde(), storageFormat.getSerdeProps(), tblProps, ifNotExists, + likeTableName); SessionState.get().setCommandType(HiveOperation.CREATETABLE); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc), conf)); @@ -10217,14 +10249,14 @@ public class SemanticAnalyzer extends Ba tblProps = addDefaultProperties(tblProps); - crtTblDesc = new CreateTableDesc(dbName, tableName, isExt, cols, partCols, + crtTblDesc = new CreateTableDesc(dbName, tableName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, - comment, storageFormat.inputFormat, - storageFormat.outputFormat, location, shared.serde, storageFormat.storageHandler, - shared.serdeProps, - tblProps, ifNotExists, skewedColNames, skewedValues); + comment, storageFormat.getInputFormat(), + storageFormat.getOutputFormat(), location, storageFormat.getSerde(), + storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, + skewedColNames, skewedValues); crtTblDesc.setStoredAsSubDirectories(storedAsDirs); crtTblDesc.setNullFormat(rowFormatParams.nullFormat); qb.setTableDesc(crtTblDesc); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=1613740&r1=1613739&r2=1613740&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java Sat Jul 26 23:45:46 2014 @@ -69,6 +69,7 @@ public final class SemanticAnalyzerFacto commandType.put(HiveParser.TOK_SHOWPARTITIONS, HiveOperation.SHOWPARTITIONS); commandType.put(HiveParser.TOK_SHOWLOCKS, HiveOperation.SHOWLOCKS); commandType.put(HiveParser.TOK_SHOWDBLOCKS, HiveOperation.SHOWLOCKS); + commandType.put(HiveParser.TOK_SHOWCONF, HiveOperation.SHOWCONF); commandType.put(HiveParser.TOK_CREATEFUNCTION, HiveOperation.CREATEFUNCTION); commandType.put(HiveParser.TOK_DROPFUNCTION, HiveOperation.DROPFUNCTION); commandType.put(HiveParser.TOK_CREATEMACRO, HiveOperation.CREATEMACRO); @@ -203,6 +204,7 @@ public final class SemanticAnalyzerFacto case HiveParser.TOK_SHOWDBLOCKS: case HiveParser.TOK_SHOW_COMPACTIONS: case HiveParser.TOK_SHOW_TRANSACTIONS: + case HiveParser.TOK_SHOWCONF: case HiveParser.TOK_CREATEINDEX: case HiveParser.TOK_DROPINDEX: case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT:
