Repository: hive Updated Branches: refs/heads/master abcadab7c -> f1de6daf2
HIVE-17054: Expose SQL database constraints to Calcite (Jesus Camacho Rodriguez reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1de6daf Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1de6daf Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1de6daf Branch: refs/heads/master Commit: f1de6daf2bdd8460979e7356571e465ce4f57181 Parents: abcadab Author: Jesus Camacho Rodriguez <[email protected]> Authored: Wed Oct 18 17:19:45 2017 -0700 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Thu Oct 19 11:19:55 2017 -0700 ---------------------------------------------------------------------- .../ql/optimizer/calcite/RelOptHiveTable.java | 71 +++++- .../clientpositive/groupby_join_pushdown.q | 17 ++ .../clientpositive/groupby_join_pushdown.q.out | 220 +++++++++++++++++++ 3 files changed, 307 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f1de6daf/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 044bba1..fb01087 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -52,7 +52,10 @@ import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo.ForeignKeyCol; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.PartitionIterable; +import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.UniqueConstraint; +import org.apache.hadoop.hive.ql.metadata.UniqueConstraint.UniqueConstraintCol; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -81,6 +84,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { private final ImmutableMap<Integer, ColumnInfo> hivePartitionColsMap; private final ImmutableList<VirtualColumn> hiveVirtualCols; private final int noOfNonVirtualCols; + private final List<ImmutableBitSet> keys; private final List<RelReferentialConstraint> referentialConstraints; final HiveConf hiveConf; @@ -112,6 +116,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { this.partitionCache = partitionCache; this.colStatsCache = colStatsCache; this.noColsMissingStats = noColsMissingStats; + this.keys = generateKeys(); this.referentialConstraints = generateReferentialConstraints(); } @@ -152,7 +157,12 @@ public class RelOptHiveTable extends RelOptAbstractTable { } @Override - public boolean isKey(ImmutableBitSet arg0) { + public boolean isKey(ImmutableBitSet columns) { + for (ImmutableBitSet key : keys) { + if (columns.contains(key)) { + return true; + } + } return false; } @@ -161,6 +171,65 @@ public class RelOptHiveTable extends RelOptAbstractTable { return referentialConstraints; } + private List<ImmutableBitSet> generateKeys() { + // First PK + final PrimaryKeyInfo pki; + try { + pki = Hive.get().getReliablePrimaryKeys( + hiveTblMetadata.getDbName(), hiveTblMetadata.getTableName()); + } catch (HiveException e) { + throw new RuntimeException(e); + } + ImmutableList.Builder<ImmutableBitSet> builder = ImmutableList.builder(); + if (!pki.getColNames().isEmpty()) { + ImmutableBitSet.Builder keys = ImmutableBitSet.builder(); + for (String pkColName : pki.getColNames().values()) { + int pkPos; + for (pkPos = 0; pkPos < rowType.getFieldNames().size(); pkPos++) { + String colName = rowType.getFieldNames().get(pkPos); + if (pkColName.equals(colName)) { + break; + } + } + if (pkPos == rowType.getFieldNames().size() + || pkPos == rowType.getFieldNames().size()) { + LOG.error("Column for primary key definition " + pkColName + " not found"); + return ImmutableList.of(); + } + keys.set(pkPos); + } + builder.add(keys.build()); + } + // Then UKs + final UniqueConstraint uki; + try { + uki = Hive.get().getReliableUniqueConstraints( + hiveTblMetadata.getDbName(), hiveTblMetadata.getTableName()); + } catch (HiveException e) { + throw new RuntimeException(e); + } + for (List<UniqueConstraintCol> ukCols : uki.getUniqueConstraints().values()) { + ImmutableBitSet.Builder keys = ImmutableBitSet.builder(); + for (UniqueConstraintCol ukCol : ukCols) { + int ukPos; + for (ukPos = 0; ukPos < rowType.getFieldNames().size(); ukPos++) { + String colName = rowType.getFieldNames().get(ukPos); + if (ukCol.colName.equals(colName)) { + break; + } + } + if (ukPos == rowType.getFieldNames().size() + || ukPos == rowType.getFieldNames().size()) { + LOG.error("Column for unique constraint definition " + ukCol.colName + " not found"); + return ImmutableList.of(); + } + keys.set(ukPos); + } + builder.add(keys.build()); + } + return builder.build(); + } + private List<RelReferentialConstraint> generateReferentialConstraints() { final ForeignKeyInfo fki; try { http://git-wip-us.apache.org/repos/asf/hive/blob/f1de6daf/ql/src/test/queries/clientpositive/groupby_join_pushdown.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_join_pushdown.q b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q index a6e2568..d0bf0fb 100644 --- a/ql/src/test/queries/clientpositive/groupby_join_pushdown.q +++ b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q @@ -54,3 +54,20 @@ SELECT sum(f.cint), f.ctinyint FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) GROUP BY f.ctinyint, g.ctinyint; +ALTER TABLE alltypesorc ADD CONSTRAINT pk_alltypesorc_1 PRIMARY KEY (ctinyint) DISABLE RELY; + +-- COLUMNS ARE UNIQUE, OPTIMIZATION IS NOT TRIGGERED +explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint; + +ALTER TABLE alltypesorc DROP CONSTRAINT pk_alltypesorc_1; + +ALTER TABLE alltypesorc ADD CONSTRAINT uk_alltypesorc_1 UNIQUE (ctinyint) DISABLE RELY; + +-- COLUMNS ARE UNIQUE, OPTIMIZATION IS NOT TRIGGERED +explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint; http://git-wip-us.apache.org/repos/asf/hive/blob/f1de6daf/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out index 95d499a..2a2fdd7 100644 --- a/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out +++ b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out @@ -1454,3 +1454,223 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: ALTER TABLE alltypesorc ADD CONSTRAINT pk_alltypesorc_1 PRIMARY KEY (ctinyint) DISABLE RELY +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: ALTER TABLE alltypesorc ADD CONSTRAINT pk_alltypesorc_1 PRIMARY KEY (ctinyint) DISABLE RELY +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: g + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: tinyint), _col2 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: ALTER TABLE alltypesorc DROP CONSTRAINT pk_alltypesorc_1 +PREHOOK: type: ALTERTABLE_DROPCONSTRAINT +POSTHOOK: query: ALTER TABLE alltypesorc DROP CONSTRAINT pk_alltypesorc_1 +POSTHOOK: type: ALTERTABLE_DROPCONSTRAINT +PREHOOK: query: ALTER TABLE alltypesorc ADD CONSTRAINT uk_alltypesorc_1 UNIQUE (ctinyint) DISABLE RELY +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: ALTER TABLE alltypesorc ADD CONSTRAINT uk_alltypesorc_1 UNIQUE (ctinyint) DISABLE RELY +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: g + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: tinyint), _col2 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +
