Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Wed Sep 10 21:41:16 2014 @@ -346,48 +346,6 @@ public class ColumnStatsSemanticAnalyzer return rewrittenTree; } - public ColumnStatsSemanticAnalyzer(HiveConf conf, ASTNode tree) throws SemanticException { - super(conf, false); - // check if it is no scan. grammar prevents coexit noscan/columns - super.processNoScanCommand(tree); - // check if it is partial scan. grammar prevents coexit partialscan/columns - super.processPartialScanCommand(tree); - /* Rewrite only analyze table <> column <> compute statistics; Don't rewrite analyze table - * command - table stats are collected by the table scan operator and is not rewritten to - * an aggregation. - */ - if (shouldRewrite(tree)) { - tbl = getTable(tree); - colNames = getColumnName(tree); - // Save away the original AST - originalTree = tree; - boolean isPartitionStats = isPartitionLevelStats(tree); - Map<String,String> partSpec = null; - checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); - validateSpecifiedColumnNames(colNames); - if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) { - isPartitionStats = true; - } - - if (isPartitionStats) { - isTableLevel = false; - partSpec = getPartKeyValuePairsFromAST(tree); - handlePartialPartitionSpec(partSpec); - } else { - isTableLevel = true; - } - colType = getColumnTypes(colNames); - int numBitVectors = getNumBitVectorsForNDVEstimation(conf); - rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats); - rewrittenTree = genRewrittenTree(rewrittenQuery); - } else { - // Not an analyze table column compute statistics statement - don't do any rewrites - originalTree = rewrittenTree = tree; - rewrittenQuery = null; - isRewritten = false; - } - } - // fail early if the columns specified for column statistics are not valid private void validateSpecifiedColumnNames(List<String> specifiedCols) throws SemanticException { @@ -421,6 +379,46 @@ public class ColumnStatsSemanticAnalyzer // initialize QB init(true); + // check if it is no scan. grammar prevents coexit noscan/columns + super.processNoScanCommand(ast); + // check if it is partial scan. grammar prevents coexit partialscan/columns + super.processPartialScanCommand(ast); + /* Rewrite only analyze table <> column <> compute statistics; Don't rewrite analyze table + * command - table stats are collected by the table scan operator and is not rewritten to + * an aggregation. + */ + if (shouldRewrite(ast)) { + tbl = getTable(ast); + colNames = getColumnName(ast); + // Save away the original AST + originalTree = ast; + boolean isPartitionStats = isPartitionLevelStats(ast); + Map<String,String> partSpec = null; + checkForPartitionColumns( + colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); + validateSpecifiedColumnNames(colNames); + if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) { + isPartitionStats = true; + } + + if (isPartitionStats) { + isTableLevel = false; + partSpec = getPartKeyValuePairsFromAST(ast); + handlePartialPartitionSpec(partSpec); + } else { + isTableLevel = true; + } + colType = getColumnTypes(colNames); + int numBitVectors = getNumBitVectorsForNDVEstimation(conf); + rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats); + rewrittenTree = genRewrittenTree(rewrittenQuery); + } else { + // Not an analyze table column compute statistics statement - don't do any rewrites + originalTree = rewrittenTree = ast; + rewrittenQuery = null; + isRewritten = false; + } + // Setup the necessary metadata if originating from analyze rewrite if (isRewritten) { qb = getQB();
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java Wed Sep 10 21:41:16 2014 @@ -25,10 +25,14 @@ import org.apache.hadoop.hive.ql.plan.Li */ public class GlobalLimitCtx { - private boolean enable = false; - private int globalLimit = -1; - private boolean hasTransformOrUDTF = false; - private LimitDesc lastReduceLimitDesc = null; + private boolean enable; + private int globalLimit; + private boolean hasTransformOrUDTF; + private LimitDesc lastReduceLimitDesc; + + public GlobalLimitCtx() { + reset(); + } public int getGlobalLimit() { return globalLimit; @@ -64,4 +68,11 @@ public class GlobalLimitCtx { this.globalLimit = -1; this.lastReduceLimitDesc = null; } + + public void reset() { + enable = false; + globalLimit = -1; + hasTransformOrUDTF = false; + lastReduceLimitDesc = null; + } } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Sep 10 21:41:16 2014 @@ -293,6 +293,8 @@ public class SemanticAnalyzer extends Ba public static final String DUMMY_DATABASE = "_dummy_database"; public static final String DUMMY_TABLE = "_dummy_table"; + // Max characters when auto generating the column name with func name + private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner; private HashMap<TableScanOperator, PrunedPartitionList> opToPartList; @@ -315,7 +317,7 @@ public class SemanticAnalyzer extends Ba private HashMap<TableScanOperator, sampleDesc> opToSamplePruner; private final Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner; /** - * a map for the split sampling, from ailias to an instance of SplitSample + * a map for the split sampling, from alias to an instance of SplitSample * that describes percentage and number. */ private final HashMap<String, SplitSample> nameToSplitSample; @@ -326,7 +328,7 @@ public class SemanticAnalyzer extends Ba private ArrayList<String> viewsExpanded; private ASTNode viewSelect; private final UnparseTranslator unparseTranslator; - private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx(); + private final GlobalLimitCtx globalLimitCtx; // prefix for column names auto generated by hive private final String autogenColAliasPrfxLbl; @@ -337,16 +339,13 @@ public class SemanticAnalyzer extends Ba // keeps track of aliases for V3, V3:V2, V3:V2:V1. // This is used when T is added as an input for the query, the parents of T is // derived from the alias V3:V2:V1:T - private final Map<String, ReadEntity> viewAliasToInput = new HashMap<String, ReadEntity>(); - - // Max characters when auto generating the column name with func name - private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; + private final Map<String, ReadEntity> viewAliasToInput; // flag for no scan during analyze ... compute statistics - protected boolean noscan = false; + protected boolean noscan; //flag for partial scan during analyze ... compute statistics - protected boolean partialscan = false; + protected boolean partialscan; private volatile boolean runCBO = true; private volatile boolean disableJoinMerge = false; @@ -371,7 +370,6 @@ public class SemanticAnalyzer extends Ba } public SemanticAnalyzer(HiveConf conf) throws SemanticException { - super(conf); opToPartPruner = new HashMap<TableScanOperator, ExprNodeDesc>(); opToPartList = new HashMap<TableScanOperator, PrunedPartitionList>(); @@ -401,6 +399,9 @@ public class SemanticAnalyzer extends Ba queryProperties = new QueryProperties(); opToPartToSkewedPruner = new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>(); aliasToCTEs = new HashMap<String, ASTNode>(); + globalLimitCtx = new GlobalLimitCtx(); + viewAliasToInput = new HashMap<String, ReadEntity>(); + noscan = partialscan = false; } @Override Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java Wed Sep 10 21:41:16 2014 @@ -263,7 +263,7 @@ public final class SemanticAnalyzerFacto return new FunctionSemanticAnalyzer(conf); case HiveParser.TOK_ANALYZE: - return new ColumnStatsSemanticAnalyzer(conf, tree); + return new ColumnStatsSemanticAnalyzer(conf); case HiveParser.TOK_CREATEMACRO: case HiveParser.TOK_DROPMACRO: Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java Wed Sep 10 21:41:16 2014 @@ -69,7 +69,11 @@ public class GroupByDesc extends Abstrac transient private boolean isDistinct; private boolean dontResetAggrsDistinct; + // Extra parameters only for vectorization. + private VectorGroupByDesc vectorDesc; + public GroupByDesc() { + vectorDesc = new VectorGroupByDesc(); } public GroupByDesc( @@ -102,6 +106,7 @@ public class GroupByDesc extends Abstrac final boolean groupingSetsPresent, final int groupingSetsPosition, final boolean isDistinct) { + vectorDesc = new VectorGroupByDesc(); this.mode = mode; this.outputColumnNames = outputColumnNames; this.keys = keys; @@ -116,6 +121,14 @@ public class GroupByDesc extends Abstrac this.isDistinct = isDistinct; } + public void setVectorDesc(VectorGroupByDesc vectorDesc) { + this.vectorDesc = vectorDesc; + } + + public VectorGroupByDesc getVectorDesc() { + return vectorDesc; + } + public Mode getMode() { return mode; } @@ -268,6 +281,14 @@ public class GroupByDesc extends Abstrac this.groupingSetPosition = groupingSetPosition; } + public boolean isDontResetAggrsDistinct() { + return dontResetAggrsDistinct; + } + + public void setDontResetAggrsDistinct(boolean dontResetAggrsDistinct) { + this.dontResetAggrsDistinct = dontResetAggrsDistinct; + } + public boolean isDistinct() { return isDistinct; } @@ -276,11 +297,4 @@ public class GroupByDesc extends Abstrac this.isDistinct = isDistinct; } - public boolean isDontResetAggrsDistinct() { - return dontResetAggrsDistinct; - } - - public void setDontResetAggrsDistinct(boolean dontResetAggrsDistinct) { - this.dontResetAggrsDistinct = dontResetAggrsDistinct; - } } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Wed Sep 10 21:41:16 2014 @@ -470,10 +470,7 @@ public class SessionState { */ private void createSessionDirs(String userName) throws IOException { HiveConf conf = getConf(); - // First create the root scratch dir on hdfs (if it doesn't already exist) and make it writable - Path rootHDFSDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR)); - String rootHDFSDirPermission = "777"; - createPath(conf, rootHDFSDirPath, rootHDFSDirPermission, false, false); + Path rootHDFSDirPath = createRootHDFSDir(conf); // Now create session specific dirs String scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION); Path path; @@ -506,6 +503,30 @@ public class SessionState { } /** + * Create the root scratch dir on hdfs (if it doesn't already exist) and make it writable + * @param conf + * @return + * @throws IOException + */ + private Path createRootHDFSDir(HiveConf conf) throws IOException { + Path rootHDFSDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR)); + FsPermission expectedHDFSDirPermission = new FsPermission("777"); + FileSystem fs = rootHDFSDirPath.getFileSystem(conf); + if (!fs.exists(rootHDFSDirPath)) { + Utilities.createDirsWithPermission(conf, rootHDFSDirPath, expectedHDFSDirPermission, true); + } + FsPermission currentHDFSDirPermission = fs.getFileStatus(rootHDFSDirPath).getPermission(); + LOG.debug("HDFS root scratch dir: " + rootHDFSDirPath + ", permission: " + + currentHDFSDirPermission); + // If the root HDFS scratch dir already exists, make sure the permissions are 777. + if (!expectedHDFSDirPermission.equals(fs.getFileStatus(rootHDFSDirPath).getPermission())) { + throw new RuntimeException("The root scratch dir: " + rootHDFSDirPath + + " on HDFS should be writable. Current permissions are: " + currentHDFSDirPermission); + } + return rootHDFSDirPath; + } + + /** * Create a given path if it doesn't exist. * * @param conf @@ -624,10 +645,10 @@ public class SessionState { authorizerV2 = authorizerFactory.createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(), conf, authenticator, authzContextBuilder.build()); - authorizerV2.applyAuthorizationConfigPolicy(conf); - // create the create table grants with new config - createTableGrants = CreateTableAutomaticGrant.create(conf); + authorizerV2.applyAuthorizationConfigPolicy(conf); } + // create the create table grants with new config + createTableGrants = CreateTableAutomaticGrant.create(conf); } catch (HiveException e) { throw new RuntimeException(e); Modified: hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original) +++ hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Wed Sep 10 21:41:16 2014 @@ -107,7 +107,7 @@ public class TestVectorizer { gbyOp.setConf(desc); Vectorizer v = new Vectorizer(); - Assert.assertTrue(v.validateMapWorkOperator(gbyOp)); + Assert.assertTrue(v.validateMapWorkOperator(gbyOp, false)); VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext); Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass()); VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0]; @@ -150,7 +150,7 @@ public class TestVectorizer { /** * prepareAbstractMapJoin prepares a join operator descriptor, used as helper by SMB and Map join tests. */ - private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends MapJoinDesc> mop, MapJoinDesc mjdesc) { + private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends MapJoinDesc> map, MapJoinDesc mjdesc) { mjdesc.setPosBigTable(0); List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>(); expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false)); @@ -180,14 +180,14 @@ public class TestVectorizer { */ @Test public void testValidateMapJoinOperator() { - MapJoinOperator mop = new MapJoinOperator(); + MapJoinOperator map = new MapJoinOperator(); MapJoinDesc mjdesc = new MapJoinDesc(); - prepareAbstractMapJoin(mop, mjdesc); - mop.setConf(mjdesc); + prepareAbstractMapJoin(map, mjdesc); + map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateMapWorkOperator(mop)); + Assert.assertTrue(vectorizer.validateMapWorkOperator(map, false)); } @@ -196,13 +196,13 @@ public class TestVectorizer { */ @Test public void testValidateSMBJoinOperator() { - SMBMapJoinOperator mop = new SMBMapJoinOperator(); + SMBMapJoinOperator map = new SMBMapJoinOperator(); SMBJoinDesc mjdesc = new SMBJoinDesc(); - prepareAbstractMapJoin(mop, mjdesc); - mop.setConf(mjdesc); + prepareAbstractMapJoin(map, mjdesc); + map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateMapWorkOperator(mop)); + Assert.assertTrue(vectorizer.validateMapWorkOperator(map, false)); } } Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q (original) +++ hive/branches/cbo/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q Wed Sep 10 21:41:16 2014 @@ -150,7 +150,9 @@ insert overwrite table over1k_part_buck_ desc formatted over1k_part_buck_sort2_orc partition(t=27); desc formatted over1k_part_buck_sort2_orc partition(t="__HIVE_DEFAULT_PARTITION__"); +explain select * from over1k_part_buck_sort2_orc; select * from over1k_part_buck_sort2_orc; +explain select count(*) from over1k_part_buck_sort2_orc; select count(*) from over1k_part_buck_sort2_orc; set hive.optimize.sort.dynamic.partition=true; @@ -159,5 +161,7 @@ insert overwrite table over1k_part_buck_ desc formatted over1k_part_buck_sort2_orc partition(t=27); desc formatted over1k_part_buck_sort2_orc partition(t="__HIVE_DEFAULT_PARTITION__"); +explain select * from over1k_part_buck_sort2_orc; select * from over1k_part_buck_sort2_orc; +explain select count(*) from over1k_part_buck_sort2_orc; select count(*) from over1k_part_buck_sort2_orc; Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/input_lazyserde.q URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/input_lazyserde.q?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/queries/clientpositive/input_lazyserde.q (original) +++ hive/branches/cbo/ql/src/test/queries/clientpositive/input_lazyserde.q Wed Sep 10 21:41:16 2014 @@ -30,3 +30,7 @@ CREATE TABLE dest1(a map<string,string>) INSERT OVERWRITE TABLE dest1 SELECT src_thrift.mstringstring FROM src_thrift DISTRIBUTE BY 1; SELECT * from dest1; +CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE; +INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2; +SELECT * from destBin ORDER BY a; +DROP TABLE destBin; Modified: hive/branches/cbo/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out (original) +++ hive/branches/cbo/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out Wed Sep 10 21:41:16 2014 @@ -1951,6 +1951,27 @@ Bucket Columns: [si] Sort Columns: [Order(col:f, order:1)] Storage Desc Params: serialization.format 1 +PREHOOK: query: explain select * from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: select * from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc @@ -1982,6 +2003,56 @@ POSTHOOK: Input: default@over1k_part_buc 409 65536 4294967490 46.97 NULL 374 65560 4294967516 65.43 NULL 473 65720 4294967324 80.74 NULL +PREHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(*) from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc @@ -2098,6 +2169,27 @@ Bucket Columns: [si] Sort Columns: [Order(col:f, order:1)] Storage Desc Params: serialization.format 1 +PREHOOK: query: explain select * from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: select * from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc @@ -2129,6 +2221,56 @@ POSTHOOK: Input: default@over1k_part_buc 409 65536 4294967490 46.97 NULL 374 65560 4294967516 65.43 NULL 473 65720 4294967324 80.74 NULL +PREHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(*) from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc Modified: hive/branches/cbo/ql/src/test/results/clientpositive/input_lazyserde.q.out URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/input_lazyserde.q.out?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/results/clientpositive/input_lazyserde.q.out (original) +++ hive/branches/cbo/ql/src/test/results/clientpositive/input_lazyserde.q.out Wed Sep 10 21:41:16 2014 @@ -219,3 +219,536 @@ NULL {"key_7":"value_7"} {"key_8":"value_8"} {"key_9":"value_9"} +PREHOOK: query: CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@destBin +POSTHOOK: query: CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@destBin +PREHOOK: query: INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket2 +PREHOOK: Output: default@destbin +POSTHOOK: query: INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket2 +POSTHOOK: Output: default@destbin +POSTHOOK: Lineage: destbin.a EXPRESSION [(srcbucket2)srcbucket2.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: SELECT * from destBin ORDER BY a +PREHOOK: type: QUERY +PREHOOK: Input: default@destbin +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from destBin ORDER BY a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@destbin +#### A masked pattern was here #### +{0:0} +{0:0} +{0:0} +{0:10} +{0:11} +{0:12} +{0:12} +{0:15} +{0:15} +{0:17} +{0:18} +{0:18} +{0:19} +{0:20} +{0:24} +{0:24} +{0:26} +{0:26} +{0:27} +{0:28} +{0:2} +{0:30} +{0:33} +{0:34} +{0:35} +{0:35} +{0:35} +{0:37} +{0:37} +{0:400} +{0:401} +{0:401} +{0:401} +{0:401} +{0:401} +{0:402} +{0:403} +{0:403} +{0:403} +{0:404} +{0:404} +{0:406} +{0:406} +{0:406} +{0:406} +{0:407} +{0:409} +{0:409} +{0:409} +{0:411} +{0:413} +{0:413} +{0:414} +{0:414} +{0:417} +{0:417} +{0:417} +{0:418} +{0:419} +{0:41} +{0:421} +{0:424} +{0:424} +{0:427} +{0:429} +{0:429} +{0:42} +{0:42} +{0:430} +{0:430} +{0:430} +{0:431} +{0:431} +{0:431} +{0:432} +{0:435} +{0:436} +{0:437} +{0:438} +{0:438} +{0:438} +{0:439} +{0:439} +{0:43} +{0:443} +{0:444} +{0:446} +{0:448} +{0:449} +{0:44} +{0:452} +{0:453} +{0:454} +{0:454} +{0:454} +{0:455} +{0:457} +{0:458} +{0:458} +{0:459} +{0:459} +{0:460} +{0:462} +{0:462} +{0:463} +{0:463} +{0:466} +{0:466} +{0:466} +{0:467} +{0:468} +{0:468} +{0:468} +{0:468} +{0:469} +{0:469} +{0:469} +{0:469} +{0:469} +{0:470} +{0:472} +{0:475} +{0:477} +{0:478} +{0:478} +{0:479} +{0:47} +{0:480} +{0:480} +{0:480} +{0:481} +{0:482} +{0:483} +{0:484} +{0:485} +{0:487} +{0:489} +{0:489} +{0:489} +{0:489} +{0:490} +{0:491} +{0:492} +{0:492} +{0:493} +{0:494} +{0:495} +{0:496} +{0:497} +{0:498} +{0:498} +{0:498} +{0:4} +{0:51} +{0:51} +{0:53} +{0:54} +{0:57} +{0:58} +{0:58} +{0:5} +{0:5} +{0:5} +{0:64} +{0:65} +{0:66} +{0:67} +{0:67} +{0:69} +{0:70} +{0:70} +{0:70} +{0:72} +{0:72} +{0:74} +{0:76} +{0:76} +{0:77} +{0:78} +{0:80} +{0:82} +{0:83} +{0:83} +{0:84} +{0:84} +{0:85} +{0:86} +{0:87} +{0:8} +{0:90} +{0:90} +{0:90} +{0:92} +{0:95} +{0:95} +{0:96} +{0:97} +{0:97} +{0:98} +{0:98} +{0:9} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{1:2.0} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{2:["one","two"]} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +{3:{"col1":5,"col2":"five"}} +PREHOOK: query: DROP TABLE destBin +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@destbin +PREHOOK: Output: default@destbin +POSTHOOK: query: DROP TABLE destBin +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@destbin +POSTHOOK: Output: default@destbin Modified: hive/branches/cbo/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out (original) +++ hive/branches/cbo/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out Wed Sep 10 21:41:16 2014 @@ -2026,6 +2026,25 @@ Bucket Columns: [si] Sort Columns: [Order(col:f, order:1)] Storage Desc Params: serialization.format 1 +PREHOOK: query: explain select * from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + ListSink + PREHOOK: query: select * from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc @@ -2057,6 +2076,63 @@ POSTHOOK: Input: default@over1k_part_buc 409 65536 4294967490 46.97 NULL 374 65560 4294967516 65.43 NULL 473 65720 4294967324 80.74 NULL +PREHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(*) from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc @@ -2173,6 +2249,25 @@ Bucket Columns: [si] Sort Columns: [Order(col:f, order:1)] Storage Desc Params: serialization.format 1 +PREHOOK: query: explain select * from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + ListSink + PREHOOK: query: select * from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc @@ -2204,6 +2299,63 @@ POSTHOOK: Input: default@over1k_part_buc 409 65536 4294967490 46.97 NULL 374 65560 4294967516 65.43 NULL 473 65720 4294967324 80.74 NULL +PREHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from over1k_part_buck_sort2_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_part_buck_sort2_orc + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 19 Data size: 493 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(*) from over1k_part_buck_sort2_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc Modified: hive/branches/cbo/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out (original) +++ hive/branches/cbo/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out Wed Sep 10 21:41:16 2014 @@ -103,6 +103,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator Modified: hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out (original) +++ hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out Wed Sep 10 21:41:16 2014 @@ -106,6 +106,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator Modified: hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out (original) +++ hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out Wed Sep 10 21:41:16 2014 @@ -114,6 +114,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator Modified: hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== Files hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out (original) and hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out Wed Sep 10 21:41:16 2014 differ Modified: hive/branches/cbo/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java (original) +++ hive/branches/cbo/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java Wed Sep 10 21:41:16 2014 @@ -37,6 +37,8 @@ public class serdeConstants { public static final String SERIALIZATION_CLASS = "serialization.class"; + public static final String SERIALIZATION_TYPE = "serialization.type"; + public static final String SERIALIZATION_FORMAT = "serialization.format"; public static final String SERIALIZATION_DDL = "serialization.ddl"; Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java Wed Sep 10 21:41:16 2014 @@ -48,7 +48,8 @@ public interface PredicateLeaf { STRING, // string, char, varchar DATE, DECIMAL, - TIMESTAMP + TIMESTAMP, + BOOLEAN } /** Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java Wed Sep 10 21:41:16 2014 @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.avro; +import java.io.ByteArrayInputStream; import java.io.DataInput; import java.io.DataOutput; import java.io.DataOutputStream; @@ -25,6 +26,7 @@ import java.io.InputStream; import java.rmi.server.UID; import org.apache.avro.Schema; +import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; @@ -101,6 +103,28 @@ public class AvroGenericRecordWritable i GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema); record = gdr.read(record, binaryDecoder); } + + public void readFields(byte[] bytes, int offset, int length, Schema writerSchema, Schema readerSchema) throws IOException { + fileSchema = writerSchema; + record = new GenericData.Record(writerSchema); + binaryDecoder = + DecoderFactory.get().binaryDecoder(bytes, offset, length - offset, + binaryDecoder); + GenericDatumReader<GenericRecord> gdr = + new GenericDatumReader<GenericRecord>(writerSchema, readerSchema); + record = gdr.read(null, binaryDecoder); + } + + public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { + fileSchema = writerSchema; + record = new GenericData.Record(writerSchema); + GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); + gdr.setExpected(readerSchema); + ByteArrayInputStream is = new ByteArrayInputStream(bytes); + DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr); + record = dfr.next(record); + dfr.close(); + } public UID getRecordReaderID() { return recordReaderID; @@ -117,5 +141,4 @@ public class AvroGenericRecordWritable i public void setFileSchema(Schema originalSchema) { this.fileSchema = originalSchema; } - } Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java Wed Sep 10 21:41:16 2014 @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.serde2.typ * * A list of those fields equivalent types in Hive * * An ObjectInspector capable of working with an instance of that datum. */ -class AvroObjectInspectorGenerator { +public class AvroObjectInspectorGenerator { final private List<String> columnNames; final private List<TypeInfo> columnTypes; final private ObjectInspector oi; Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java Wed Sep 10 21:41:16 2014 @@ -57,6 +57,7 @@ public class AvroSerdeUtils { public static final String EXCEPTION_MESSAGE = "Neither " + SCHEMA_LITERAL + " nor " + SCHEMA_URL + " specified, can't determine table schema"; public static final String AVRO_SERDE_SCHEMA = "avro.serde.schema"; + public static final String SCHEMA_RETRIEVER = "avro.schema.retriever"; /** * Determine the schema to that's been provided for Avro serde work. Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java Wed Sep 10 21:41:16 2014 @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.laz import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -217,11 +218,11 @@ public final class LazyFactory { */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, - byte escapeChar) throws SerDeException { + byte escapeChar, ObjectInspectorOptions option) throws SerDeException { return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, - escaped, escapeChar, false); + escaped, escapeChar, false, option); } - + /** * Create a hierarchical ObjectInspector for LazyObject with the given * typeInfo. @@ -236,13 +237,54 @@ public final class LazyFactory { * delimiting entries, the second one for delimiting key and values. * @param nullSequence * The sequence of bytes representing NULL. + * @return The ObjectInspector + * @throws SerDeException + */ + public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, + byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte escapeChar) throws SerDeException { + return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, + escaped, escapeChar, false, ObjectInspectorOptions.JAVA); + } + + /** + * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo. + * + * @param typeInfo The type information for the LazyObject + * @param separator The array of separators for delimiting each level + * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of + * separator, and map uses 2 levels: the first one for delimiting entries, the second one + * for delimiting key and values. + * @param nullSequence The sequence of bytes representing NULL. * @param extendedBooleanLiteral whether extended boolean literal set is legal + * @param option the {@link ObjectInspectorOption} * @return The ObjectInspector * @throws SerDeException */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { + return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped, + escapeChar, extendedBooleanLiteral, ObjectInspectorOptions.JAVA); + } + + /** + * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo. + * + * @param typeInfo The type information for the LazyObject + * @param separator The array of separators for delimiting each level + * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of + * separator, and map uses 2 levels: the first one for delimiting entries, the second one + * for delimiting key and values. + * @param nullSequence The sequence of bytes representing NULL. + * @param extendedBooleanLiteral whether extended boolean literal set is legal + * @param option the {@link ObjectInspectorOption} + * @return The ObjectInspector + * @throws SerDeException + */ + public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, + byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException { ObjectInspector.Category c = typeInfo.getCategory(); switch (c) { case PRIMITIVE: @@ -252,9 +294,9 @@ public final class LazyFactory { return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( createLazyObjectInspector(((MapTypeInfo) typeInfo) .getMapKeyTypeInfo(), separator, separatorIndex + 2, - nullSequence, escaped, escapeChar, extendedBooleanLiteral), createLazyObjectInspector( + nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector( ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator, - separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral), + separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex), LazyUtils.getSeparator(separator, separatorIndex+1), nullSequence, escaped, escapeChar); @@ -262,7 +304,7 @@ public final class LazyFactory { return LazyObjectInspectorFactory.getLazySimpleListObjectInspector( createLazyObjectInspector(((ListTypeInfo) typeInfo) .getListElementTypeInfo(), separator, separatorIndex + 1, - nullSequence, escaped, escapeChar, extendedBooleanLiteral), LazyUtils.getSeparator(separator, separatorIndex), + nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex), nullSequence, escaped, escapeChar); case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; @@ -274,19 +316,20 @@ public final class LazyFactory { for (int i = 0; i < fieldTypeInfos.size(); i++) { fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos .get(i), separator, separatorIndex + 1, nullSequence, escaped, - escapeChar, extendedBooleanLiteral)); + escapeChar, extendedBooleanLiteral, option)); } return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( fieldNames, fieldObjectInspectors, LazyUtils.getSeparator(separator, separatorIndex), - nullSequence, false, escaped, escapeChar); + nullSequence, + false, escaped, escapeChar, option); case UNION: UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; List<ObjectInspector> lazyOIs = new ArrayList<ObjectInspector>(); for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) { lazyOIs.add(createLazyObjectInspector(uti, separator, separatorIndex + 1, nullSequence, escaped, - escapeChar, extendedBooleanLiteral)); + escapeChar, extendedBooleanLiteral, option)); } return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs, LazyUtils.getSeparator(separator, separatorIndex), Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java Wed Sep 10 21:41:16 2014 @@ -342,4 +342,13 @@ public class LazyStruct extends LazyNonP } return indexes; } -} + + /** + * Return the data in bytes corresponding to this given struct. This is useful specifically in + * cases where the data is stored in serialized formats like protobufs or thrift and would need + * custom deserializers to be deserialized. + * */ + public byte[] getBytes() { + return bytes.getData(); + } +} \ No newline at end of file Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java Wed Sep 10 21:41:16 2014 @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazy; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Text; /** @@ -26,8 +25,7 @@ import org.apache.hadoop.io.Text; * non-primitive. * */ -public class LazyUnion extends - LazyNonPrimitive<LazyUnionObjectInspector> { +public class LazyUnion extends LazyNonPrimitive<LazyUnionObjectInspector> { /** * Whether the data is already parsed or not. */ @@ -41,7 +39,7 @@ public class LazyUnion extends /** * The object of the union. */ - private LazyObject<? extends ObjectInspector> field; + private Object field; /** * Tag of the Union @@ -54,6 +52,16 @@ public class LazyUnion extends private boolean fieldInited = false; /** + * Whether the tag has been set or not + * */ + private boolean tagSet = false; + + /** + * Whether the field has been set or not + * */ + private boolean fieldSet = false; + + /** * Construct a LazyUnion object with the ObjectInspector. */ public LazyUnion(LazyUnionObjectInspector oi) { @@ -123,6 +131,7 @@ public class LazyUnion extends * * @return The value of the field */ + @SuppressWarnings("rawtypes") private Object uncheckedGetField() { Text nullSequence = oi.getNullSequence(); int fieldLength = start + length - startPosition; @@ -134,9 +143,9 @@ public class LazyUnion extends if (!fieldInited) { fieldInited = true; - field.init(bytes, startPosition, fieldLength); + ((LazyObject) field).init(bytes, startPosition, fieldLength); } - return field.getObject(); + return ((LazyObject) field).getObject(); } /** @@ -145,6 +154,10 @@ public class LazyUnion extends * @return The field as a LazyObject */ public Object getField() { + if (fieldSet) { + return field; + } + if (!parsed) { parse(); } @@ -157,9 +170,33 @@ public class LazyUnion extends * @return The tag byte */ public byte getTag() { + if (tagSet) { + return tag; + } + if (!parsed) { parse(); } return tag; } -} + + /** + * Set the field of the union + * + * @param field the field to be set + * */ + public void setField(Object field) { + this.field = field; + fieldSet = true; + } + + /** + * Set the tag for the union + * + * @param tag the tag to be set + * */ + public void setTag(byte tag) { + this.tag = tag; + tagSet = true; + } +} \ No newline at end of file Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java Wed Sep 10 21:41:16 2014 @@ -22,7 +22,9 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.io.Text; /** @@ -48,14 +50,34 @@ public final class LazyObjectInspectorFa byte escapeChar) { return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, null, separator, nullSequence, - lastColumnTakesRest, escaped, escapeChar); + lastColumnTakesRest, escaped, escapeChar, ObjectInspectorOptions.JAVA); + } + + public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( + List<String> structFieldNames, + List<ObjectInspector> structFieldObjectInspectors, byte separator, + Text nullSequence, boolean lastColumnTakesRest, boolean escaped, + byte escapeChar, ObjectInspectorOptions option) { + return getLazySimpleStructObjectInspector(structFieldNames, + structFieldObjectInspectors, null, separator, nullSequence, + lastColumnTakesRest, escaped, escapeChar, option); } public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments, byte separator, Text nullSequence, boolean lastColumnTakesRest, - boolean escaped,byte escapeChar) { + boolean escaped, byte escapeChar) { + return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, + structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar, + ObjectInspectorOptions.JAVA); + } + + public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( + List<String> structFieldNames, + List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments, + byte separator, Text nullSequence, boolean lastColumnTakesRest, + boolean escaped,byte escapeChar, ObjectInspectorOptions option) { ArrayList<Object> signature = new ArrayList<Object>(); signature.add(structFieldNames); signature.add(structFieldObjectInspectors); @@ -64,15 +86,30 @@ public final class LazyObjectInspectorFa signature.add(Boolean.valueOf(lastColumnTakesRest)); signature.add(Boolean.valueOf(escaped)); signature.add(Byte.valueOf(escapeChar)); + signature.add(option); if(structFieldComments != null) { signature.add(structFieldComments); } LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector .get(signature); if (result == null) { - result = new LazySimpleStructObjectInspector(structFieldNames, - structFieldObjectInspectors, structFieldComments, separator, - nullSequence, lastColumnTakesRest, escaped, escapeChar); + switch (option) { + case JAVA: + result = + new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, + structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, + escapeChar); + break; + case AVRO: + result = + new AvroLazyObjectInspector(structFieldNames, structFieldObjectInspectors, + structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, + escapeChar); + break; + default: + throw new IllegalArgumentException("Illegal ObjectInspector type [" + option + "]"); + } + cachedLazySimpleStructObjectInspector.put(signature, result); } return result; Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java Wed Sep 10 21:41:16 2014 @@ -18,10 +18,13 @@ package org.apache.hadoop.hive.serde2.lazy.objectinspector; +import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hive.serde2.BaseStructObjectInspector; import org.apache.hadoop.hive.serde2.StructObject; +import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.io.Text; @@ -100,6 +103,20 @@ public class LazySimpleStructObjectInspe int fieldID = f.getFieldID(); assert (fieldID >= 0 && fieldID < fields.size()); + ObjectInspector oi = f.getFieldObjectInspector(); + + if (oi instanceof AvroLazyObjectInspector) { + return ((AvroLazyObjectInspector) oi).getStructFieldData(data, fieldRef); + } + + if (oi instanceof MapObjectInspector) { + ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector(); + + if (valueOI instanceof AvroLazyObjectInspector) { + return ((AvroLazyObjectInspector) valueOI).getStructFieldData(data, fieldRef); + } + } + return struct.getField(fieldID); } @@ -108,8 +125,15 @@ public class LazySimpleStructObjectInspe if (data == null) { return null; } - StructObject struct = (StructObject) data; - return struct.getFieldsAsList(); + + // Iterate over all the fields picking up the nested structs within them + List<Object> result = new ArrayList<Object>(fields.size()); + + for (MyField myField : fields) { + result.add(getStructFieldData(data, myField)); + } + + return result; } // For LazyStruct Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java Wed Sep 10 21:41:16 2014 @@ -23,6 +23,7 @@ import java.util.List; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryUnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -106,6 +107,8 @@ public final class LazyBinaryFactory { return new LazyBinaryArray((LazyBinaryListObjectInspector) oi); case STRUCT: return new LazyBinaryStruct((LazyBinaryStructObjectInspector) oi); + case UNION: + return new LazyBinaryUnion((LazyBinaryUnionObjectInspector) oi); } throw new RuntimeException("Hive LazyBinarySerDe Internal error."); Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Wed Sep 10 21:41:16 2014 @@ -43,8 +43,8 @@ import org.apache.hadoop.hive.serde2.laz import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -281,6 +281,13 @@ public class LazyBinarySerDe extends Abs } } + private static void serializeUnion(RandomAccessOutput byteStream, Object obj, + UnionObjectInspector uoi, BooleanRef warnedOnceNullMapKey) throws SerDeException { + byte tag = uoi.getTag(obj); + byteStream.write(tag); + serialize(byteStream, uoi.getField(obj), uoi.getObjectInspectors().get(tag), false, warnedOnceNullMapKey); + } + private static void serializeText( RandomAccessOutput byteStream, Text t, boolean skipLengthPrefix) { /* write byte size of the string which is a vint */ @@ -544,24 +551,31 @@ public class LazyBinarySerDe extends Abs } return; } - case STRUCT: { + case STRUCT: + case UNION:{ int byteSizeStart = 0; - int structStart = 0; + int typeStart = 0; if (!skipLengthPrefix) { // 1/ reserve spaces for the byte size of the struct // which is a integer and takes four bytes byteSizeStart = byteStream.getLength(); byteStream.reserve(4); - structStart = byteStream.getLength(); + typeStart = byteStream.getLength(); + } + + if (ObjectInspector.Category.STRUCT.equals(objInspector.getCategory()) ) { + // 2/ serialize the struct + serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey); + } else { + // 2/ serialize the union + serializeUnion(byteStream, obj, (UnionObjectInspector) objInspector, warnedOnceNullMapKey); } - // 2/ serialize the struct - serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey); if (!skipLengthPrefix) { // 3/ update the byte size of the struct - int structEnd = byteStream.getLength(); - int structSize = structEnd - structStart; - writeSizeAtOffset(byteStream, byteSizeStart, structSize); + int typeEnd = byteStream.getLength(); + int typeSize = typeEnd - typeStart; + writeSizeAtOffset(byteStream, byteSizeStart, typeSize); } return; } Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java Wed Sep 10 21:41:16 2014 @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.typ import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.WritableUtils; @@ -226,6 +227,7 @@ public final class LazyBinaryUtils { case LIST: case MAP: case STRUCT: + case UNION: recordInfo.elementOffset = 4; recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset); break; @@ -474,6 +476,20 @@ public final class LazyBinaryUtils { fieldObjectInspectors); break; } + case UNION: { + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + final List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); + List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>( + fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors + .add(getLazyBinaryObjectInspectorFromTypeInfo(fieldTypeInfos + .get(i))); + } + result = LazyBinaryObjectInspectorFactory + .getLazyBinaryUnionObjectInspector(fieldObjectInspectors); + break; + } default: { result = null; } Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java Wed Sep 10 21:41:16 2014 @@ -40,6 +40,9 @@ public final class LazyBinaryObjectInspe static ConcurrentHashMap<ArrayList<Object>, LazyBinaryStructObjectInspector> cachedLazyBinaryStructObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyBinaryStructObjectInspector>(); + static ConcurrentHashMap<ArrayList<Object>, LazyBinaryUnionObjectInspector> cachedLazyBinaryUnionObjectInspector = + new ConcurrentHashMap<ArrayList<Object>, LazyBinaryUnionObjectInspector>(); + public static LazyBinaryStructObjectInspector getLazyBinaryStructObjectInspector( List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) { @@ -66,6 +69,20 @@ public final class LazyBinaryObjectInspe return result; } + public static LazyBinaryUnionObjectInspector getLazyBinaryUnionObjectInspector( + List<ObjectInspector> unionFieldObjectInspectors) { + ArrayList<Object> signature = new ArrayList<Object>(1); + signature.add(unionFieldObjectInspectors); + + LazyBinaryUnionObjectInspector result = cachedLazyBinaryUnionObjectInspector + .get(signature); + if (result == null) { + result = new LazyBinaryUnionObjectInspector(unionFieldObjectInspectors); + cachedLazyBinaryUnionObjectInspector.put(signature, result); + } + return result; + } + static ConcurrentHashMap<ArrayList<Object>, LazyBinaryListObjectInspector> cachedLazyBinaryListObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyBinaryListObjectInspector>(); Modified: hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff ============================================================================== --- hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (original) +++ hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java Wed Sep 10 21:41:16 2014 @@ -57,7 +57,7 @@ public final class ObjectInspectorFactor * for the same Java type. */ public enum ObjectInspectorOptions { - JAVA, THRIFT, PROTOCOL_BUFFERS + JAVA, THRIFT, PROTOCOL_BUFFERS, AVRO }; private static ConcurrentHashMap<Type, ObjectInspector> objectInspectorCache = new ConcurrentHashMap<Type, ObjectInspector>();
