HIVE-14493: Partitioning support for materialized views (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/163e0de9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/163e0de9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/163e0de9 Branch: refs/heads/master Commit: 163e0de98d982fb2188e64bf5e11660359dc1d28 Parents: 4d43695 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Mon Jul 30 08:27:42 2018 -0700 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Wed Aug 1 12:59:24 2018 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 3 + .../metadata/HiveMaterializedViewsRegistry.java | 36 +- .../ql/optimizer/calcite/RelOptHiveTable.java | 13 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 11 +- .../apache/hadoop/hive/ql/parse/HiveParser.g | 29 +- .../materialized_view_partitioned.q | 62 + .../materialized_view_rewrite_part_1.q | 145 +++ .../materialized_view_rewrite_part_2.q | 164 +++ .../llap/materialized_view_partitioned.q.out | 1075 +++++++++++++++++ .../llap/materialized_view_rewrite_part_1.q.out | 1121 ++++++++++++++++++ .../llap/materialized_view_rewrite_part_2.q.out | 829 +++++++++++++ 11 files changed, 3448 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 4d2eacb..a13ad28 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -564,6 +564,7 @@ minillaplocal.query.files=\ materialized_view_create_rewrite_5.q,\ materialized_view_describe.q,\ materialized_view_drop.q,\ + materialized_view_partitioned.q,\ materialized_view_rebuild.q,\ materialized_view_rewrite_empty.q,\ materialized_view_rewrite_1.q,\ @@ -575,6 +576,8 @@ minillaplocal.query.files=\ materialized_view_rewrite_7.q,\ materialized_view_rewrite_8.q,\ materialized_view_rewrite_9.q,\ + materialized_view_rewrite_part_1.q,\ + materialized_view_rewrite_part_2.q,\ materialized_view_rewrite_ssb.q,\ materialized_view_rewrite_ssb_2.q,\ mapjoin_decimal.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 5c9162f..696227b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -96,9 +96,6 @@ public final class HiveMaterializedViewsRegistry { private final ConcurrentMap<String, ConcurrentMap<String, RelOptMaterialization>> materializedViews = new ConcurrentHashMap<String, ConcurrentMap<String, RelOptMaterialization>>(); - /* If this boolean is true, we bypass the cache. */ - private boolean dummy; - /* Whether the cache has been initialized or not. */ private AtomicBoolean initialized = new AtomicBoolean(false); @@ -137,7 +134,7 @@ public final class HiveMaterializedViewsRegistry { } public void init(Hive db) { - dummy = db.getConf().get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname) + final boolean dummy = db.getConf().get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname) .equals("DUMMY"); if (dummy) { // Dummy registry does not cache information and forwards all requests to metastore @@ -162,9 +159,11 @@ public final class HiveMaterializedViewsRegistry { public void run() { try { SessionState.start(db.getConf()); + final boolean cache = !db.getConf() + .get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname).equals("DUMMY"); for (String dbName : db.getAllDatabases()) { for (Table mv : db.getAllMaterializedViewObjects(dbName)) { - addMaterializedView(db.getConf(), mv, OpType.LOAD); + addMaterializedView(db.getConf(), mv, OpType.LOAD, cache); } } initialized.set(true); @@ -185,7 +184,9 @@ public final class HiveMaterializedViewsRegistry { * @param materializedViewTable the materialized view */ public RelOptMaterialization createMaterializedView(HiveConf conf, Table materializedViewTable) { - return addMaterializedView(conf, materializedViewTable, OpType.CREATE); + final boolean cache = !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname) + .equals("DUMMY"); + return addMaterializedView(conf, materializedViewTable, OpType.CREATE, cache); } /** @@ -193,7 +194,8 @@ public final class HiveMaterializedViewsRegistry { * * @param materializedViewTable the materialized view */ - private RelOptMaterialization addMaterializedView(HiveConf conf, Table materializedViewTable, OpType opType) { + private RelOptMaterialization addMaterializedView(HiveConf conf, Table materializedViewTable, + OpType opType, boolean cache) { // Bail out if it is not enabled for rewriting if (!materializedViewTable.isRewriteEnabled()) { LOG.debug("Materialized view " + materializedViewTable.getCompleteName() + @@ -204,7 +206,7 @@ public final class HiveMaterializedViewsRegistry { // We are going to create the map for each view in the given database ConcurrentMap<String, RelOptMaterialization> cq = new ConcurrentHashMap<String, RelOptMaterialization>(); - if (!dummy) { + if (cache) { // If we are caching the MV, we include it in the cache final ConcurrentMap<String, RelOptMaterialization> prevCq = materializedViews.putIfAbsent( materializedViewTable.getDbName(), cq); @@ -219,13 +221,13 @@ public final class HiveMaterializedViewsRegistry { final RelNode viewScan = createMaterializedViewScan(conf, materializedViewTable); if (viewScan == null) { LOG.warn("Materialized view " + materializedViewTable.getCompleteName() + - " ignored; error creating view replacement"); + " ignored; error creating view replacement"); return null; } final RelNode queryRel = parseQuery(conf, viewQuery); if (queryRel == null) { LOG.warn("Materialized view " + materializedViewTable.getCompleteName() + - " ignored; error parsing original query"); + " ignored; error parsing original query"); return null; } @@ -261,10 +263,6 @@ public final class HiveMaterializedViewsRegistry { * @param tableName the name for the materialized view to remove */ public void dropMaterializedView(String dbName, String tableName) { - if (dummy) { - // Nothing to do - return; - } ConcurrentMap<String, RelOptMaterialization> dbMap = materializedViews.get(dbName); if (dbMap != null) { dbMap.remove(tableName); @@ -288,8 +286,8 @@ public final class HiveMaterializedViewsRegistry { // 0. Recreate cluster final RelOptPlanner planner = CalcitePlanner.createPlanner(conf); final RexBuilder rexBuilder = new RexBuilder( - new JavaTypeFactoryImpl( - new HiveTypeSystemImpl())); + new JavaTypeFactoryImpl( + new HiveTypeSystemImpl())); final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder); // 1. Create column schema @@ -380,7 +378,7 @@ public final class HiveMaterializedViewsRegistry { List<Interval> intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); rowType = dtFactory.createStructType(druidColTypes, druidColNames); - RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, + RelOptHiveTable optTable = new RelOptHiveTable(null, cluster.getTypeFactory(), fullyQualifiedTabName, rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), @@ -392,7 +390,7 @@ public final class HiveMaterializedViewsRegistry { optTable, druidTable, ImmutableList.<RelNode>of(scan), ImmutableMap.of()); } else { // Build Hive Table Scan Rel - RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, + RelOptHiveTable optTable = new RelOptHiveTable(null, cluster.getTypeFactory(), fullyQualifiedTabName, rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, @@ -448,4 +446,4 @@ public final class HiveMaterializedViewsRegistry { LOAD // already created view being loaded } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 943227f..37e6d4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -44,6 +44,7 @@ import org.apache.calcite.rel.RelReferentialConstraint; import org.apache.calcite.rel.RelReferentialConstraintImpl; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.schema.ColumnStrategy; @@ -87,6 +88,7 @@ public class RelOptHiveTable implements RelOptTable { //~ Instance fields -------------------------------------------------------- private final RelOptSchema schema; + private final RelDataTypeFactory typeFactory; private final RelDataType rowType; private final List<String> qualifiedTblName; private final String name; @@ -110,12 +112,13 @@ public class RelOptHiveTable implements RelOptTable { protected static final Logger LOG = LoggerFactory.getLogger(RelOptHiveTable.class.getName()); - public RelOptHiveTable(RelOptSchema calciteSchema, List<String> qualifiedTblName, + public RelOptHiveTable(RelOptSchema calciteSchema, RelDataTypeFactory typeFactory, List<String> qualifiedTblName, RelDataType rowType, Table hiveTblMetadata, List<ColumnInfo> hiveNonPartitionCols, List<ColumnInfo> hivePartitionCols, List<VirtualColumn> hiveVirtualCols, HiveConf hconf, Map<String, PrunedPartitionList> partitionCache, Map<String, ColumnStatsList> colStatsCache, AtomicInteger noColsMissingStats) { this.schema = calciteSchema; + this.typeFactory = typeFactory; this.qualifiedTblName = ImmutableList.copyOf(qualifiedTblName); this.name = this.qualifiedTblName.stream().collect(Collectors.joining(".")); this.rowType = rowType; @@ -155,6 +158,10 @@ public class RelOptHiveTable implements RelOptTable { return schema; } + public RelDataTypeFactory getTypeFactory() { + return typeFactory; + } + @Override public Expression getExpression(Class clazz) { throw new UnsupportedOperationException(); @@ -201,7 +208,7 @@ public class RelOptHiveTable implements RelOptTable { } // 3. Build new Table - return new RelOptHiveTable(this.schema, this.qualifiedTblName, newRowType, + return new RelOptHiveTable(this.schema, this.typeFactory, this.qualifiedTblName, newRowType, this.hiveTblMetadata, newHiveNonPartitionCols, newHivePartitionCols, newHiveVirtualCols, this.hiveConf, this.partitionCache, this.colStatsCache, this.noColsMissingStats); } @@ -439,7 +446,7 @@ public class RelOptHiveTable implements RelOptTable { // We have valid pruning expressions, only retrieve qualifying partitions ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), - partOrVirtualCols, this.getRelOptSchema().getTypeFactory())); + partOrVirtualCols, getTypeFactory())); partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache); http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 61396e7..e6e0330 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2152,7 +2152,6 @@ public class CalcitePlanner extends SemanticAnalyzer { final RelOptCluster optCluster = basePlan.getCluster(); final PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); final RelNode calcitePreMVRewritingPlan = basePlan; // Add views to planner @@ -2216,6 +2215,8 @@ public class CalcitePlanner extends SemanticAnalyzer { LOG.warn("Exception loading materialized views", e); } if (!materializations.isEmpty()) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + // Use Calcite cost model for view rewriting optCluster.invalidateMetadataQuery(); RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); @@ -2240,7 +2241,9 @@ public class CalcitePlanner extends SemanticAnalyzer { // Restore default cost model optCluster.invalidateMetadataQuery(); RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); + if (calcitePreMVRewritingPlan != basePlan) { // A rewriting was produced, we will check whether it was part of an incremental rebuild // to try to replace INSERT OVERWRITE by INSERT @@ -2877,7 +2880,7 @@ public class CalcitePlanner extends SemanticAnalyzer { DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals, null, null); - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), @@ -2888,7 +2891,7 @@ public class CalcitePlanner extends SemanticAnalyzer { tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE), optTable, druidTable, ImmutableList.of(scan), DruidSqlOperatorConverter.getDefaultMap()); } else if (tableType == TableType.JDBC) { - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); @@ -2929,7 +2932,7 @@ public class CalcitePlanner extends SemanticAnalyzer { fullyQualifiedTabName.add(tabMetaData.getDbName()); } fullyQualifiedTabName.add(tabMetaData.getTableName()); - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName, rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, partitionCache, colStatsCache, noColsMissingStats); // Build Hive Table Scan Rel http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 49f5487..d581c60 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -1928,6 +1928,19 @@ createViewStatement ) ; +viewPartition +@init { pushMsg("view partition specification", state); } +@after { popMsg(state); } + : KW_PARTITIONED KW_ON LPAREN columnNameList RPAREN + -> ^(TOK_VIEWPARTCOLS columnNameList) + ; + +dropViewStatement +@init { pushMsg("drop view statement", state); } +@after { popMsg(state); } + : KW_DROP KW_VIEW ifExists? viewName -> ^(TOK_DROPVIEW viewName ifExists?) + ; + createMaterializedViewStatement @init { pushMsg("create materialized view statement", state); @@ -1935,7 +1948,7 @@ createMaterializedViewStatement @after { popMsg(state); } : KW_CREATE KW_MATERIALIZED KW_VIEW (ifNotExists)? name=tableName rewriteDisabled? tableComment? tableRowFormat? tableFileFormat? tableLocation? - tablePropertiesPrefixed? KW_AS selectStatementWithCTE + viewPartition? tablePropertiesPrefixed? KW_AS selectStatementWithCTE -> ^(TOK_CREATE_MATERIALIZED_VIEW $name ifNotExists? rewriteDisabled? @@ -1943,24 +1956,12 @@ createMaterializedViewStatement tableRowFormat? tableFileFormat? tableLocation? + viewPartition? tablePropertiesPrefixed? selectStatementWithCTE ) ; -viewPartition -@init { pushMsg("view partition specification", state); } -@after { popMsg(state); } - : KW_PARTITIONED KW_ON LPAREN columnNameList RPAREN - -> ^(TOK_VIEWPARTCOLS columnNameList) - ; - -dropViewStatement -@init { pushMsg("drop view statement", state); } -@after { popMsg(state); } - : KW_DROP KW_VIEW ifExists? viewName -> ^(TOK_DROPVIEW viewName ifExists?) - ; - dropMaterializedViewStatement @init { pushMsg("drop materialized view statement", state); } @after { popMsg(state); } http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/ql/src/test/queries/clientpositive/materialized_view_partitioned.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/materialized_view_partitioned.q b/ql/src/test/queries/clientpositive/materialized_view_partitioned.q new file mode 100644 index 0000000..fcc7c1f --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_partitioned.q @@ -0,0 +1,62 @@ +--! qt:dataset:src + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE src_txn stored as orc TBLPROPERTIES ('transactional' = 'true') +AS SELECT * FROM src; + +EXPLAIN +CREATE MATERIALIZED VIEW partition_mv_1 PARTITIONED ON (key) AS +SELECT value, key FROM src_txn where key > 200 and key < 250; + +CREATE MATERIALIZED VIEW partition_mv_1 PARTITIONED ON (key) AS +SELECT value, key FROM src_txn where key > 200 and key < 250; + +DESCRIBE FORMATTED partition_mv_1; + +EXPLAIN +SELECT * FROM partition_mv_1 where key = 238; + +SELECT * FROM partition_mv_1 where key = 238; + +CREATE MATERIALIZED VIEW partition_mv_2 PARTITIONED ON (value) AS +SELECT key, value FROM src_txn where key > 200 and key < 250; + +EXPLAIN +SELECT * FROM partition_mv_2 where value = 'val_238'; + +SELECT * FROM partition_mv_2 where value = 'val_238'; + +EXPLAIN +SELECT value FROM partition_mv_2 where key = 238; + +SELECT value FROM partition_mv_2 where key = 238; + +INSERT INTO src_txn VALUES (238, 'val_238_n'); + +EXPLAIN +ALTER MATERIALIZED VIEW partition_mv_1 REBUILD; + +ALTER MATERIALIZED VIEW partition_mv_1 REBUILD; + +SELECT * FROM partition_mv_1 where key = 238; + +SELECT * FROM partition_mv_2 where key = 238; + +CREATE TABLE src_txn_2 stored as orc TBLPROPERTIES ('transactional' = 'true') +AS SELECT * FROM src; + +CREATE MATERIALIZED VIEW partition_mv_3 PARTITIONED ON (key) AS +SELECT src_txn.value, src_txn.key FROM src_txn, src_txn_2 +WHERE src_txn.key = src_txn_2.key + AND src_txn.key > 200 AND src_txn.key < 250; + +INSERT INTO src_txn VALUES (238, 'val_238_n2'); + +EXPLAIN +ALTER MATERIALIZED VIEW partition_mv_3 REBUILD; + +ALTER MATERIALIZED VIEW partition_mv_3 REBUILD; + +SELECT * FROM partition_mv_3 where key = 238; http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q new file mode 100644 index 0000000..5a2e74c --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_1.q @@ -0,0 +1,145 @@ +-- SORT_QUERY_RESULTS + +set hive.vectorized.execution.enabled=false; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps_n30 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps_n30 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps_n30 compute statistics for columns; + +create table depts_n20 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts_n20 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts_n20 compute statistics for columns; + +create table dependents_n20 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents_n20 values (10, 'Michael'), (10, 'Jane'); +analyze table dependents_n20 compute statistics for columns; + +create table locations_n20 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations_n20 values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations_n20 compute statistics for columns; + +alter table emps_n30 add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts_n20 add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents_n20 add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations_n20 add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps_n30 add constraint fk1 foreign key (deptno) references depts_n20(deptno) disable novalidate rely; +alter table depts_n20 add constraint fk2 foreign key (locationid) references locations_n20(locationid) disable novalidate rely; + +-- EXAMPLE 1 +create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 150; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno); + +select * +from (select * from emps_n30 where empid < 120) t +join depts_n20 using (deptno); + +drop materialized view mv1_part_n2; + +-- EXAMPLE 2 +create materialized view mv1_part_n2 partitioned on (deptno) as +select deptno, name, salary, commission +from emps_n30; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno); + +select emps_n30.name, emps_n30.salary, emps_n30.commission +from emps_n30 +join depts_n20 using (deptno); + +drop materialized view mv1_part_n2; + +-- EXAMPLE 4 +create materialized view mv1_part_n2 partitioned on (deptno) as +select * from emps_n30 where empid < 200; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150; + +select * from emps_n30 where empid > 120 +union all select * from emps_n30 where empid < 150; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 5 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name, salary from emps_n30 group by name, salary; + +select name, salary from emps_n30 group by name, salary; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 6 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name from emps_n30 group by name; + +select name from emps_n30 group by name; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 7 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary from emps_n30 where deptno = 10 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name from emps_n30 where deptno = 10 group by name; + +select name from emps_n30 where deptno = 10 group by name; + +drop materialized view mv1_part_n2; + +-- EXAMPLE 9 +create materialized view mv1_part_n2 partitioned on (name) as +select name, salary, count(*) as c, sum(empid) as s +from emps_n30 group by name, salary; +analyze table mv1_part_n2 compute statistics for columns; + +explain +select name from emps_n30 group by name; + +select name from emps_n30 group by name; + +drop materialized view mv1_part_n2; http://git-wip-us.apache.org/repos/asf/hive/blob/163e0de9/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_2.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_2.q new file mode 100644 index 0000000..505f750 --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_part_2.q @@ -0,0 +1,164 @@ +-- SORT_QUERY_RESULTS + +set hive.vectorized.execution.enabled=false; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps_n00 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps_n00 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (110, 10, 'Bill', 10000, 250); +analyze table emps_n00 compute statistics for columns; + +create table depts_n00 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts_n00 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); +analyze table depts_n00 compute statistics for columns; + +create table dependents_n00 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents_n00 values (10, 'Michael'), (10, 'Jane'); +analyze table dependents_n00 compute statistics for columns; + +create table locations_n00 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations_n00 values (10, 'San Francisco'), (10, 'San Diego'); +analyze table locations_n00 compute statistics for columns; + +alter table emps_n00 add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts_n00 add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents_n00 add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations_n00 add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps_n00 add constraint fk1 foreign key (deptno) references depts_n00(deptno) disable novalidate rely; +alter table depts_n00 add constraint fk2 foreign key (locationid) references locations_n00(locationid) disable novalidate rely; + +-- EXAMPLE 16 +create materialized view mv1_part_n0 partitioned on (deptno) as +select empid, depts_n00.deptno as deptno from emps_n00 +join depts_n00 using (deptno) where depts_n00.deptno > 10 +group by empid, depts_n00.deptno; +analyze table mv1_part_n0 compute statistics for columns; + +explain +select empid from emps_n00 +join depts_n00 using (deptno) where depts_n00.deptno > 20 +group by empid, depts_n00.deptno; + +select empid from emps_n00 +join depts_n00 using (deptno) where depts_n00.deptno > 20 +group by empid, depts_n00.deptno; + +drop materialized view mv1_part_n0; + +-- EXAMPLE 17 +create materialized view mv1_part_n0 partitioned on (deptno) as +select depts_n00.deptno as deptno, empid from depts_n00 +join emps_n00 using (deptno) where depts_n00.deptno > 10 +group by empid, depts_n00.deptno; +analyze table mv1_part_n0 compute statistics for columns; + +explain +select empid from emps_n00 +join depts_n00 using (deptno) where depts_n00.deptno > 20 +group by empid, depts_n00.deptno; + +select empid from emps_n00 +join depts_n00 using (deptno) where depts_n00.deptno > 20 +group by empid, depts_n00.deptno; + +drop materialized view mv1_part_n0; + +-- EXAMPLE 18 +create materialized view mv1_part_n0 partitioned on (deptno) as +select empid, depts_n00.deptno as deptno from emps_n00 +join depts_n00 using (deptno) where emps_n00.deptno > 10 +group by empid, depts_n00.deptno; +analyze table mv1_part_n0 compute statistics for columns; + +explain +select empid from emps_n00 +join depts_n00 using (deptno) where depts_n00.deptno > 20 +group by empid, depts_n00.deptno; + +select empid from emps_n00 +join depts_n00 using (deptno) where depts_n00.deptno > 20 +group by empid, depts_n00.deptno; + +drop materialized view mv1_part_n0; + +-- EXAMPLE 19 +create materialized view mv1_part_n0 partitioned on (deptno) as +select depts_n00.deptno as deptno, emps_n00.empid from depts_n00 +join emps_n00 using (deptno) where emps_n00.empid > 10 +group by depts_n00.deptno, emps_n00.empid; +analyze table mv1_part_n0 compute statistics for columns; + +explain +select depts_n00.deptno from depts_n00 +join emps_n00 using (deptno) where emps_n00.empid > 15 +group by depts_n00.deptno, emps_n00.empid; + +select depts_n00.deptno from depts_n00 +join emps_n00 using (deptno) where emps_n00.empid > 15 +group by depts_n00.deptno, emps_n00.empid; + +drop materialized view mv1_part_n0; + +-- EXAMPLE 20 +create materialized view mv1_part_n0 partitioned on (deptno) as +select depts_n00.deptno as deptno, emps_n00.empid from depts_n00 +join emps_n00 using (deptno) where emps_n00.empid > 10 +group by depts_n00.deptno, emps_n00.empid; +analyze table mv1_part_n0 compute statistics for columns; + +explain +select depts_n00.deptno from depts_n00 +join emps_n00 using (deptno) where emps_n00.empid > 15 +group by depts_n00.deptno; + +select depts_n00.deptno from depts_n00 +join emps_n00 using (deptno) where emps_n00.empid > 15 +group by depts_n00.deptno; + +drop materialized view mv1_part_n0; + +-- EXAMPLE 23 +create materialized view mv1_part_n0 partitioned on (deptno2) as +select depts_n00.name, dependents_n00.name as name2, emps_n00.deptno, depts_n00.deptno as deptno2, dependents_n00.empid +from depts_n00, dependents_n00, emps_n00 +where depts_n00.deptno > 10 +group by depts_n00.name, dependents_n00.name, emps_n00.deptno, depts_n00.deptno, dependents_n00.empid; +analyze table mv1_part_n0 compute statistics for columns; + +explain +select dependents_n00.empid +from depts_n00 +join dependents_n00 on (depts_n00.name = dependents_n00.name) +join emps_n00 on (emps_n00.deptno = depts_n00.deptno) +where depts_n00.deptno > 10 +group by dependents_n00.empid; + +select dependents_n00.empid +from depts_n00 +join dependents_n00 on (depts_n00.name = dependents_n00.name) +join emps_n00 on (emps_n00.deptno = depts_n00.deptno) +where depts_n00.deptno > 10 +group by dependents_n00.empid; + +drop materialized view mv1_part_n0;
