This is an automated email from the ASF dual-hosted git repository.
vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new b7650eb HIVE-22366: Multiple metastore calls for same table and
constraints during planning (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
b7650eb is described below
commit b7650eb782f3cf6fb3d8dd46356dd8d230412332
Author: Vineet Garg <[email protected]>
AuthorDate: Mon Jan 27 10:45:14 2020 -0800
HIVE-22366: Multiple metastore calls for same table and constraints during
planning (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
---
.../org/apache/hadoop/hive/ql/metadata/Table.java | 249 +++++++++++++++------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 23 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 10 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 2 +-
.../hadoop/hive/metastore/HiveMetaStoreClient.java | 6 +
5 files changed, 201 insertions(+), 89 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index 33a4505..b9bb3ab 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -121,6 +121,16 @@ public class Table implements Serializable {
private transient DefaultConstraint dc;
private transient CheckConstraint cc;
+ /** Constraint related flags
+ * This is to track if constraints are retrieved from metastore or not
+ */
+ private transient boolean isPKFetched=false;
+ private transient boolean isFKFetched=false;
+ private transient boolean isUniqueFetched=false;
+ private transient boolean isNotNullFetched=false;
+ private transient boolean isDefaultFetched=false;
+ private transient boolean isCheckFetched=false;
+
/**
* Used only for serialization.
*/
@@ -147,6 +157,19 @@ public class Table implements Serializable {
this(getEmptyTable(databaseName, tableName));
}
+ /** This api is used by getMetaData which require deep copy of
metastore.api.table
+ * and constraints copy
+ */
+ public Table makeCopy() {
+
+ // make deep copy of metastore.api.table
+ Table newTab = new Table(this.getTTable().deepCopy());
+
+ // copy constraints
+ newTab.copyConstraints(this);
+ return newTab;
+ }
+
public boolean isDummyTable() {
return tTable.getTableName().equals(SemanticAnalyzer.DUMMY_TABLE);
}
@@ -171,7 +194,7 @@ public class Table implements Serializable {
* Initialize an empty table.
*/
public static org.apache.hadoop.hive.metastore.api.Table
- getEmptyTable(String databaseName, String tableName) {
+ getEmptyTable(String databaseName, String tableName) {
StorageDescriptor sd = new StorageDescriptor();
{
sd.setSerdeInfo(new SerDeInfo());
@@ -316,8 +339,8 @@ public class Table implements Serializable {
try {
storageHandler = HiveUtils.getStorageHandler(
SessionState.getSessionConf(),
- getProperty(
-
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE));
+ getProperty(
+
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE));
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -343,7 +366,7 @@ public class Table implements Serializable {
inputFormatClass = getStorageHandler().getInputFormatClass();
} else {
inputFormatClass = (Class<? extends InputFormat>)
- Class.forName(className, true,
Utilities.getSessionSpecifiedClassLoader());
+ Class.forName(className, true,
Utilities.getSessionSpecifiedClassLoader());
}
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
@@ -449,12 +472,12 @@ public class Table implements Serializable {
}
public void setTableType(TableType tableType) {
- tTable.setTableType(tableType.toString());
- }
+ tTable.setTableType(tableType.toString());
+ }
public TableType getTableType() {
- return Enum.valueOf(TableType.class, tTable.getTableType());
- }
+ return Enum.valueOf(TableType.class, tTable.getTableType());
+ }
public ArrayList<StructField> getFields() {
@@ -491,46 +514,46 @@ public class Table implements Serializable {
getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION));
}
- @Override
+ @Override
public String toString() {
return tTable.getTableName();
}
- /* (non-Javadoc)
- * @see java.lang.Object#hashCode()
- */
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + ((tTable == null) ? 0 : tTable.hashCode());
- return result;
- }
-
- /* (non-Javadoc)
- * @see java.lang.Object#equals(java.lang.Object)
- */
- @Override
- public boolean equals(Object obj) {
- if (this == obj) {
- return true;
- }
- if (obj == null) {
- return false;
- }
- if (getClass() != obj.getClass()) {
- return false;
- }
- Table other = (Table) obj;
- if (tTable == null) {
- if (other.tTable != null) {
- return false;
- }
- } else if (!tTable.equals(other.tTable)) {
- return false;
- }
- return true;
- }
+ /* (non-Javadoc)
+ * @see java.lang.Object#hashCode()
+ */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((tTable == null) ? 0 : tTable.hashCode());
+ return result;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ Table other = (Table) obj;
+ if (tTable == null) {
+ if (other.tTable != null) {
+ return false;
+ }
+ } else if (!tTable.equals(other.tTable)) {
+ return false;
+ }
+ return true;
+ }
public List<FieldSchema> getPartCols() {
@@ -596,7 +619,7 @@ public class Table implements Serializable {
for (String col : bucketCols) {
if (!isField(col)) {
throw new HiveException("Bucket columns " + col
- + " is not part of the table columns (" + getCols() );
+ + " is not part of the table columns (" + getCols());
}
}
tTable.getSd().setBucketCols(bucketCols);
@@ -619,7 +642,7 @@ public class Table implements Serializable {
mappings.put(valList, dirName);
}
- public Map<List<String>,String> getSkewedColValueLocationMaps() {
+ public Map<List<String>, String> getSkewedColValueLocationMaps() {
return (tTable.getSd().getSkewedInfo() != null) ?
tTable.getSd().getSkewedInfo()
.getSkewedColValueLocationMaps() : new HashMap<List<String>, String>();
}
@@ -977,8 +1000,8 @@ public class Table implements Serializable {
public boolean isNonNative() {
return getProperty(
-
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE)
- != null;
+
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE)
+ != null;
}
public String getFullyQualifiedName() {
@@ -1057,7 +1080,7 @@ public class Table implements Serializable {
try {
Class<?> clazz = conf.getClassByName(serdeLib);
if (!AbstractSerDe.class.isAssignableFrom(clazz))
- {
+ {
return true; // The default.
}
deserializer = ReflectionUtil.newInstance(
@@ -1126,71 +1149,167 @@ public class Table implements Serializable {
return outdatedForRewritingMaterializedView;
}
- /* These are only populated during optimization and describing */
+ public ColumnStatistics getColStats() {
+ return tTable.isSetColStats() ? tTable.getColStats() : null;
+ }
+
+ /**
+ * Setup the table level stats as if the table is new. Used when setting up
Table for a new
+ * table or during replication.
+ */
+ public void setStatsStateLikeNewTable() {
+ if (isPartitioned()) {
+ StatsSetupConst.setStatsStateForCreateTable(getParameters(), null,
+ StatsSetupConst.FALSE);
+ } else {
+ StatsSetupConst.setStatsStateForCreateTable(getParameters(),
+ MetaStoreUtils.getColumnNames(getCols()), StatsSetupConst.TRUE);
+ }
+ }
+
+ /** Constraints related methods
+ * Note that set apis are used by DESCRIBE only, although get apis return
RELY or ENABLE
+ * constraints DESCRIBE could set all type of constraints
+ * */
+
+ /* This only return PK which are created with RELY */
public PrimaryKeyInfo getPrimaryKeyInfo() {
+ if(!this.isPKFetched) {
+ try {
+ pki = Hive.get().getReliablePrimaryKeys(this.getDbName(),
this.getTableName());
+ this.isPKFetched = true;
+ } catch (HiveException e) {
+ LOG.warn("Cannot retrieve PK info for table : " + this.getTableName()
+ + " ignoring exception: " + e);
+ }
+ }
return pki;
}
public void setPrimaryKeyInfo(PrimaryKeyInfo pki) {
this.pki = pki;
+ this.isPKFetched = true;
}
+ /* This only return FK constraints which are created with RELY */
public ForeignKeyInfo getForeignKeyInfo() {
+ if(!isFKFetched) {
+ try {
+ fki = Hive.get().getReliableForeignKeys(this.getDbName(),
this.getTableName());
+ this.isFKFetched = true;
+ } catch (HiveException e) {
+ LOG.warn(
+ "Cannot retrieve FK info for table : " + this.getTableName()
+ + " ignoring exception: " + e);
+ }
+ }
return fki;
}
public void setForeignKeyInfo(ForeignKeyInfo fki) {
this.fki = fki;
+ this.isFKFetched = true;
}
+ /* This only return UNIQUE constraint defined with RELY */
public UniqueConstraint getUniqueKeyInfo() {
+ if(!isUniqueFetched) {
+ try {
+ uki = Hive.get().getReliableUniqueConstraints(this.getDbName(),
this.getTableName());
+ this.isUniqueFetched = true;
+ } catch (HiveException e) {
+ LOG.warn(
+ "Cannot retrieve Unique Key info for table : " +
this.getTableName()
+ + " ignoring exception: " + e);
+ }
+ }
return uki;
}
public void setUniqueKeyInfo(UniqueConstraint uki) {
this.uki = uki;
+ this.isUniqueFetched = true;
}
+ /* This only return NOT NULL constraint defined with RELY */
public NotNullConstraint getNotNullConstraint() {
+ if(!isNotNullFetched) {
+ try {
+ nnc = Hive.get().getReliableNotNullConstraints(this.getDbName(),
this.getTableName());
+ this.isNotNullFetched = true;
+ } catch (HiveException e) {
+ LOG.warn("Cannot retrieve Not Null constraint info for table : "
+ + this.getTableName() + " ignoring exception: " + e);
+ }
+ }
return nnc;
}
public void setNotNullConstraint(NotNullConstraint nnc) {
this.nnc = nnc;
+ this.isNotNullFetched = true;
}
+ /* This only return DEFAULT constraint defined with ENABLE */
public DefaultConstraint getDefaultConstraint() {
+ if(!isDefaultFetched) {
+ try {
+ dc = Hive.get().getEnabledDefaultConstraints(this.getDbName(),
this.getTableName());
+ this.isDefaultFetched = true;
+ } catch (HiveException e) {
+ LOG.warn("Cannot retrieve Default constraint info for table : "
+ + this.getTableName() + " ignoring exception: " + e);
+ }
+ }
return dc;
}
public void setDefaultConstraint(DefaultConstraint dc) {
this.dc = dc;
+ this.isDefaultFetched = true;
}
+ /* This only return CHECK constraint defined with ENABLE */
public CheckConstraint getCheckConstraint() {
+ if(!isCheckFetched) {
+ try{
+ cc = Hive.get().getEnabledCheckConstraints(this.getDbName(),
this.getTableName());
+ this.isCheckFetched = true;
+ } catch (HiveException e) {
+ LOG.warn("Cannot retrieve Check constraint info for table : "
+ + this.getTableName() + " ignoring exception: " + e);
+ }
+ }
return cc;
}
public void setCheckConstraint(CheckConstraint cc) {
this.cc = cc;
+ this.isCheckFetched = true;
}
+ /** This shouldn't use get apis because those api call metastore
+ * to fetch constraints.
+ * getMetaData only need to make a copy of existing constraints, even if
those are not fetched
+ */
+ public void copyConstraints(final Table tbl) {
+ this.pki = tbl.pki;
+ this.isPKFetched = tbl.isPKFetched;
- public ColumnStatistics getColStats() {
- return tTable.isSetColStats() ? tTable.getColStats() : null;
- }
+ this.fki = tbl.fki;
+ this.isFKFetched = tbl.isFKFetched;
- /**
- * Setup the table level stats as if the table is new. Used when setting up
Table for a new
- * table or during replication.
- */
- public void setStatsStateLikeNewTable() {
- if (isPartitioned()) {
- StatsSetupConst.setStatsStateForCreateTable(getParameters(), null,
- StatsSetupConst.FALSE);
- } else {
- StatsSetupConst.setStatsStateForCreateTable(getParameters(),
- MetaStoreUtils.getColumnNames(getCols()), StatsSetupConst.TRUE);
- }
+ this.uki = tbl.uki;
+ this.isUniqueFetched = tbl.isUniqueFetched;
+
+ this.nnc = tbl.nnc;
+ this.isNotNullFetched = tbl.isNotNullFetched;
+
+ this.dc = tbl.dc;
+ this.isDefaultFetched = tbl.isDefaultFetched;
+
+ this.cc = tbl.cc;
+ this.isCheckFetched = tbl.isCheckFetched;
}
+
};
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 773c5ca..7680ede 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -5225,24 +5225,19 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
@Override
- protected Table getTableObjectByName(String tableName, boolean
throwException) throws HiveException {
- if (!tabNameToTabObject.containsKey(tableName)) {
- // TODO: The code below should be a single HMS call and possibly unified
with method in SemanticAnalyzer
- Table table = db.getTable(tableName, throwException);
+ protected Table getTableObjectByName(String tabName, boolean throwException)
throws HiveException {
+ String[] names = Utilities.getDbTableName(tabName);
+ final String tableName = names[1];
+ final String dbName = names[0];
+ final String fullyQualName = dbName + "." + tableName;
+ if (!tabNameToTabObject.containsKey(fullyQualName)) {
+ Table table = db.getTable(dbName, tableName, throwException);
if (table != null) {
- table.setPrimaryKeyInfo(db.getReliablePrimaryKeys(
- table.getDbName(), table.getTableName()));
- table.setForeignKeyInfo(db.getReliableForeignKeys(
- table.getDbName(), table.getTableName()));
- table.setUniqueKeyInfo(db.getReliableUniqueConstraints(
- table.getDbName(), table.getTableName()));
- table.setNotNullConstraint(db.getReliableNotNullConstraints(
- table.getDbName(), table.getTableName()));
- tabNameToTabObject.put(tableName, table);
+ tabNameToTabObject.put(fullyQualName, table);
}
return table;
}
- return tabNameToTabObject.get(tableName);
+ return tabNameToTabObject.get(fullyQualName);
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index dd23a90..4afd454 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -490,7 +490,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
} else {
mergeIsDirect = false;
}
- tabNameToTabObject.clear();
loadTableWork.clear();
loadFileWork.clear();
columnStatsAutoGatherContexts.clear();
@@ -2142,14 +2141,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
// Get table details from tabNameToTabObject cache
Table tab = getTableObjectByName(tabName, false);
if (tab != null) {
- // copy table object in case downstream changes it
- Table newTab = new Table(tab.getTTable().deepCopy());
- // copy constraints, we do not need deep copy as
- // they should not be changed
- newTab.setPrimaryKeyInfo(tab.getPrimaryKeyInfo());
- newTab.setForeignKeyInfo(tab.getForeignKeyInfo());
- newTab.setUniqueKeyInfo(tab.getUniqueKeyInfo());
- newTab.setNotNullConstraint(tab.getNotNullConstraint());
+ Table newTab = tab.makeCopy();
tab = newTab;
}
if (tab == null ||
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 8084dcd..cb2d0a7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1024,7 +1024,7 @@ public class StatsUtils {
// This table is dummy and has no stats
return stats;
}
- if (fetchColStats) {
+ if (fetchColStats && !colStatsToRetrieve.isEmpty()) {
try {
List<ColumnStatisticsObj> colStat =
Hive.get().getTableColumnStatistics(
dbName, tabName, colStatsToRetrieve, false);
diff --git
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 8220f10..c7e1044 100644
---
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -2565,6 +2565,9 @@ public class HiveMetaStoreClient implements
IMetaStoreClient, AutoCloseable {
@Override
public List<ColumnStatisticsObj> getTableColumnStatistics(String catName,
String dbName,
String tableName, List<String> colNames, String engine) throws
TException {
+ if(colNames.isEmpty()) {
+ return Collections.emptyList();
+ }
TableStatsRequest rqst = new TableStatsRequest(dbName, tableName,
colNames, engine);
rqst.setCatName(catName);
rqst.setEngine(engine);
@@ -2581,6 +2584,9 @@ public class HiveMetaStoreClient implements
IMetaStoreClient, AutoCloseable {
@Override
public List<ColumnStatisticsObj> getTableColumnStatistics(String catName,
String dbName,
String tableName, List<String> colNames, String engine, String
validWriteIdList) throws TException {
+ if(colNames.isEmpty()) {
+ return Collections.emptyList();
+ }
TableStatsRequest rqst = new TableStatsRequest(dbName, tableName,
colNames, engine);
rqst.setCatName(catName);
rqst.setEngine(engine);