[
https://issues.apache.org/jira/browse/HIVE-24397?focusedWorklogId=519254&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-519254
]
ASF GitHub Bot logged work on HIVE-24397:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 02/Dec/20 21:24
Start Date: 02/Dec/20 21:24
Worklog Time Spent: 10m
Work Description: vnhive commented on a change in pull request #1681:
URL: https://github.com/apache/hive/pull/1681#discussion_r530816743
##########
File path:
standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesGetExists.java
##########
@@ -402,6 +393,134 @@ public void testGetTableObjectsByName() throws Exception {
}
+ @Test
+ public void testGetTableObjectsWithProjectionOfSingleField() throws
Exception {
+ List<String> tableNames = new ArrayList<>();
+ tableNames.add(testTables[0].getTableName());
+ tableNames.add(testTables[1].getTableName());
+
+ GetTablesRequest request = new GetTablesRequest();
+ request.setProjectionSpec(new GetProjectionsSpec());
+ request.setTblNames(tableNames);
+ request.setDbName(DEFAULT_DATABASE);
+
+ GetProjectionsSpec projectSpec = request.getProjectionSpec();
+ List<String> projectedFields = Collections.singletonList("sd.location");
+ projectSpec.setFieldList(projectedFields);
+
+ List<Table> tables = client.getTableObjectsByRequest(request);
+
+ Assert.assertEquals("Found tables", 2, tables.size());
+
+ for(Table table : tables) {
+ Assert.assertFalse(table.isSetDbName());
+ Assert.assertFalse(table.isSetCatName());
+ Assert.assertFalse(table.isSetTableName());
+ Assert.assertTrue(table.isSetSd());
+ }
+ }
+
+ @Test
+ public void testGetTableObjectsWithNullProjectionSpec() throws Exception {
+ List<String> tableNames = new ArrayList<>();
+ tableNames.add(testTables[0].getTableName());
+ tableNames.add(testTables[1].getTableName());
+
+ GetTablesRequest request = new GetTablesRequest();
+ request.setProjectionSpec(null);
+ request.setTblNames(tableNames);
+ request.setDbName(DEFAULT_DATABASE);
+
+ List<Table> tables = client.getTableObjectsByRequest(request);
+
+ Assert.assertEquals("Found tables", 2, tables.size());
+ }
+
+ @Test
+ public void testGetTableObjectsWithNonExistentColumn() throws Exception {
+ List<String> tableNames = new ArrayList<>();
+ tableNames.add(testTables[0].getTableName());
+ tableNames.add(testTables[1].getTableName());
+
+ GetTablesRequest request = new GetTablesRequest();
+ request.setProjectionSpec(new GetProjectionsSpec());
+ request.setTblNames(tableNames);
+ request.setDbName(DEFAULT_DATABASE);
+
+ GetProjectionsSpec projectSpec = request.getProjectionSpec();
+ List<String> projectedFields = Arrays.asList("Invalid1");
+ projectSpec.setFieldList(projectedFields);
+
+ Assert.assertThrows(Exception.class,
()->client.getTableObjectsByRequest(request));
+ }
+
+
+ @Test
+ public void testGetTableObjectsWithNonExistentColumns() throws Exception {
+ List<String> tableNames = new ArrayList<>();
+ tableNames.add(testTables[0].getTableName());
+ tableNames.add(testTables[1].getTableName());
+
+ GetTablesRequest request = new GetTablesRequest();
+ request.setProjectionSpec(new GetProjectionsSpec());
+ request.setTblNames(tableNames);
+ request.setDbName(DEFAULT_DATABASE);
+
+ GetProjectionsSpec projectSpec = request.getProjectionSpec();
+ List<String> projectedFields = Arrays.asList("Invalid1", "Invalid2");
+ projectSpec.setFieldList(projectedFields);
+
+ Assert.assertThrows(Exception.class,
()->client.getTableObjectsByRequest(request));
+ }
+
+ @Test
+ public void testGetTableObjectsWithEmptyProjection() throws Exception {
+ List<String> tableNames = new ArrayList<>();
+ tableNames.add(testTables[0].getTableName());
+ tableNames.add(testTables[1].getTableName());
+
+ GetTablesRequest request = new GetTablesRequest();
+ request.setProjectionSpec(new GetProjectionsSpec());
+ request.setTblNames(tableNames);
+ request.setDbName(DEFAULT_DATABASE);
+
+ GetProjectionsSpec projectSpec = request.getProjectionSpec();
+ List<String> projectedFields = Arrays.asList();
+ projectSpec.setFieldList(projectedFields);
+
+ List<Table> tables = client.getTableObjectsByRequest(request);
+
+ Assert.assertEquals("Found tables", 0, tables.size());
+ }
+
+ @Test
+ public void testGetTableObjectsWithProjectionOfMultipleField() throws
Exception {
+ List<String> tableNames = new ArrayList<>();
+ tableNames.add(testTables[0].getTableName());
+ tableNames.add(testTables[1].getTableName());
+
+ GetTablesRequest request = new GetTablesRequest();
+ request.setProjectionSpec(new GetProjectionsSpec());
+ request.setTblNames(tableNames);
+ request.setDbName(DEFAULT_DATABASE);
+
+ GetProjectionsSpec projectSpec = request.getProjectionSpec();
+ List<String> projectedFields = Arrays.asList("database", "tableName",
"createTime", "lastAccessTime");
Review comment:
Thank you for this comment. This is an excellent catch from you.
> can we add some nested single valued fields here too. Something other than
sd.location?
> like for example. sd.serdeInfo.serializationLib?
Done.
> Also, what is the expected behavior if the user adds a multi-valued field
like sd.parameters.
Currently JDO does not support multi-valued fields and we end up returning
all the columns. This is the same behaviour as partitions.
> Can we have some tests around that too.
Done !
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -1855,43 +1869,81 @@ private MTable getMTable(String catName, String db,
String table) {
for (String t : tbl_names) {
lowered_tbl_names.add(normalizeIdentifier(t));
}
+
query = pm.newQuery(MTable.class);
query.setFilter("database.name == db && database.catalogName == cat &&
tbl_names.contains(tableName)");
query.declareParameters("java.lang.String db, java.lang.String cat,
java.util.Collection tbl_names");
- Collection mtables = (Collection) query.execute(db, catName,
lowered_tbl_names);
- if (mtables == null || mtables.isEmpty()) {
- // Need to differentiate between an unmatched pattern and a
non-existent database
- dbExistsQuery = pm.newQuery(MDatabase.class, "name == db &&
catalogName == cat");
- dbExistsQuery.declareParameters("java.lang.String db, java.lang.String
cat");
- dbExistsQuery.setUnique(true);
- dbExistsQuery.setResult("name");
- String dbNameIfExists = (String) dbExistsQuery.execute(db, catName);
- if (org.apache.commons.lang3.StringUtils.isEmpty(dbNameIfExists)) {
- throw new UnknownDBException("Could not find database " +
- DatabaseName.getQualified(catName, db));
+
+ if (projectionSpec == null) {
+ mtables = (List<MTable>) query.execute(db, catName, lowered_tbl_names);
+ }
+ else if(projectionSpec.getFieldList() != null &&
projectionSpec.getFieldList().size() > 1) {
Review comment:
one returns a multiple columns in each row List<Object[]> and the other
returns only one column for each row, List<Object>. The parsing logic does vary
in the former case no ? Are you suggesting typecasting to List<Object[]> in
both the cases and unifying the logic ?
##########
File path:
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
##########
@@ -2360,6 +2360,21 @@ public Table getTable(String catName, String dbName,
String tableName, String va
return
deepCopyTables(FilterUtils.filterTablesIfEnabled(isClientFilterEnabled,
filterHook, tabs));
}
+ @Override
+ public GetTablesResult getTables(GetTablesRequest req) throws TException {
+ if (processorCapabilities != null)
+ req.setProcessorCapabilities(new
ArrayList<String>(Arrays.asList(processorCapabilities)));
+ if (processorIdentifier != null)
+ req.setProcessorIdentifier(processorIdentifier);
+ if (req.getCapabilities() == null) {
Review comment:
Thank you for pointing it out. Fixed!
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -1855,43 +1869,89 @@ private MTable getMTable(String catName, String db,
String table) {
for (String t : tbl_names) {
lowered_tbl_names.add(normalizeIdentifier(t));
}
+
query = pm.newQuery(MTable.class);
query.setFilter("database.name == db && database.catalogName == cat &&
tbl_names.contains(tableName)");
query.declareParameters("java.lang.String db, java.lang.String cat,
java.util.Collection tbl_names");
- Collection mtables = (Collection) query.execute(db, catName,
lowered_tbl_names);
- if (mtables == null || mtables.isEmpty()) {
- // Need to differentiate between an unmatched pattern and a
non-existent database
- dbExistsQuery = pm.newQuery(MDatabase.class, "name == db &&
catalogName == cat");
- dbExistsQuery.declareParameters("java.lang.String db, java.lang.String
cat");
- dbExistsQuery.setUnique(true);
- dbExistsQuery.setResult("name");
- String dbNameIfExists = (String) dbExistsQuery.execute(db, catName);
- if (org.apache.commons.lang3.StringUtils.isEmpty(dbNameIfExists)) {
- throw new UnknownDBException("Could not find database " +
- DatabaseName.getQualified(catName, db));
+
+ List<String> projectionFields = null;
+
+ // If a projection specification has been set, validate it and translate
it to JDO columns.
+ if (projectionSpec != null) {
+ //Validate the projection fields for multi-valued fields.
+ projectionFields =
TableFields.getMFieldNames(projectionSpec.getFieldList());
+ }
+
+ // If the JDO translation resulted in valid JDO columns names, use it to
create a projection for the JDO query.
+ if (projectionFields != null) {
+ // fetch partially filled tables using result clause
+ query.setResult(Joiner.on(',').join(projectionFields));
+ }
+
+ if (projectionFields == null) {
+ mtables = (List<MTable>) query.execute(db, catName, lowered_tbl_names);
+ } else {
+ if (projectionFields.size() > 1) {
+ // Execute the query to fetch the partial results.
+ List<Object[]> results = (List<Object[]>) query.execute(db, catName,
lowered_tbl_names);
+ // Declare the tables array to return the list of tables
+ mtables = new ArrayList<>(results.size());
+ // Iterate through each row of the result and create the MTable
object.
+ for (Object[] row : results) {
+ MTable mtable = new MTable();
+ int i = 0;
+ for (Object val : row) {
+ MetaStoreServerUtils.setNestedProperty(mtable,
projectionFields.get(i), val, true);
+ i++;
+ }
+ mtables.add(mtable);
+ }
+ } else if (projectionFields.size() == 1) {
+ // Execute the query to fetch the partial results.
+ List<Object> results = (List<Object>) query.execute(db, catName,
lowered_tbl_names);
+ // Iterate through each row of the result and create the MTable
object.
+ mtables = new ArrayList<>(results.size());
+ for (Object row : results) {
+ MTable mtable = new MTable();
+ MetaStoreServerUtils.setNestedProperty(mtable,
projectionFields.get(0), row, true);
+ mtables.add(mtable);
+ }
}
+ }
+
+ if (mtables == null || mtables.isEmpty()) {
+ verifyDBExists(catName, db);
Review comment:
Done !
##########
File path:
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
##########
@@ -733,6 +733,27 @@ Table getTable(String catName, String dbName, String
tableName,
List<Table> getTableObjectsByName(String dbName, List<String> tableNames)
throws MetaException, InvalidOperationException, UnknownDBException,
TException;
+
+ /**
+ * Get tables as objects (rather than just fetching their names). This is
more expensive and
+ * should only be used if you actually need all the information about the
tables.
+ * @param request GetTablesRequest Object.
+ * @return A list of objects representing the tables.
+ * Only the tables that can be retrieved from the database are
returned. For example,
+ * if none of the requested tables could be retrieved, an empty
list is returned.
+ * There is no guarantee of ordering of the returned tables.
+ * @throws InvalidOperationException
+ * The input to this operation is invalid (e.g., the list of tables
names is null)
+ * @throws UnknownDBException
+ * The requested database could not be fetched.
+ * @throws TException
+ * A thrift communication error occurred
+ * @throws MetaException
+ * Any other errors
+ */
+ List<Table> getTableObjectsByRequest(GetTablesRequest request)
Review comment:
> Does the method name have to include byRequest? Its self-evident by
the signature right? How about
> getTables(GetTablesRequest request).
I have added getTables.
> Also, can we keep the return type as GetTablesResponse here so that the
interface is more extendable
> in the future?
There is no GetTablesReponse object. There is a TablesResult object. I have
changed the method to return this.
##########
File path:
standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
##########
@@ -1454,7 +1454,8 @@ struct GetTablesRequest {
3: optional ClientCapabilities capabilities,
4: optional string catName,
5: optional list<string> processorCapabilities,
- 6: optional string processorIdentifier
+ 6: optional string processorIdentifier,
+ 7: optional GetProjectionsSpec projectionSpec
Review comment:
Done !
1. I added a comment inside the definition of GetProjectionSpec, saying that
the include and exclude patterns are not supported
2. I throw an Unsupported operation exception from the table API.
3. I have written a test case to verify this.
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -1839,13 +1839,27 @@ private MTable getMTable(String catName, String db,
String table) {
return nmtbl.mtbl;
}
- @Override
- public List<Table> getTableObjectsByName(String catName, String db,
List<String> tbl_names)
- throws MetaException, UnknownDBException {
+ private void verifyDBExists(String catName, String db) throws
UnknownDBException {
+ // Need to differentiate between an unmatched pattern and a non-existent
database
+ Query dbExistsQuery = pm.newQuery(MDatabase.class, "name == db &&
catalogName == cat");
+ dbExistsQuery.declareParameters("java.lang.String db, java.lang.String
cat");
+ dbExistsQuery.setUnique(true);
+ dbExistsQuery.setResult("name");
+ String dbNameIfExists = (String) dbExistsQuery.execute(db, catName);
+ if (org.apache.commons.lang3.StringUtils.isEmpty(dbNameIfExists)) {
+ throw new UnknownDBException("Could not find database " +
+ DatabaseName.getQualified(catName, db));
+ }
+ dbExistsQuery.closeAll();
+ }
+
+ private List<Table> getTableObjectsByNameUtil(String catName, String db,
List<String> tbl_names,
Review comment:
I agree, this reduces code. Done !
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -1855,43 +1869,81 @@ private MTable getMTable(String catName, String db,
String table) {
for (String t : tbl_names) {
lowered_tbl_names.add(normalizeIdentifier(t));
}
+
query = pm.newQuery(MTable.class);
query.setFilter("database.name == db && database.catalogName == cat &&
tbl_names.contains(tableName)");
query.declareParameters("java.lang.String db, java.lang.String cat,
java.util.Collection tbl_names");
- Collection mtables = (Collection) query.execute(db, catName,
lowered_tbl_names);
- if (mtables == null || mtables.isEmpty()) {
- // Need to differentiate between an unmatched pattern and a
non-existent database
- dbExistsQuery = pm.newQuery(MDatabase.class, "name == db &&
catalogName == cat");
- dbExistsQuery.declareParameters("java.lang.String db, java.lang.String
cat");
- dbExistsQuery.setUnique(true);
- dbExistsQuery.setResult("name");
- String dbNameIfExists = (String) dbExistsQuery.execute(db, catName);
- if (org.apache.commons.lang3.StringUtils.isEmpty(dbNameIfExists)) {
- throw new UnknownDBException("Could not find database " +
- DatabaseName.getQualified(catName, db));
+
+ if (projectionSpec == null) {
+ mtables = (List<MTable>) query.execute(db, catName, lowered_tbl_names);
+ }
+ else if(projectionSpec.getFieldList() != null &&
projectionSpec.getFieldList().size() > 1) {
+ // fetch partially filled tables using result clause
+ query.setResult(Joiner.on(',').join(projectionSpec.getFieldList()));
Review comment:
Done !
##########
File path:
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
##########
@@ -733,6 +733,27 @@ Table getTable(String catName, String dbName, String
tableName,
List<Table> getTableObjectsByName(String dbName, List<String> tableNames)
throws MetaException, InvalidOperationException, UnknownDBException,
TException;
+
+ /**
+ * Get tables as objects (rather than just fetching their names). This is
more expensive and
+ * should only be used if you actually need all the information about the
tables.
+ * @param request GetTablesRequest Object.
+ * @return A list of objects representing the tables.
+ * Only the tables that can be retrieved from the database are
returned. For example,
+ * if none of the requested tables could be retrieved, an empty
list is returned.
+ * There is no guarantee of ordering of the returned tables.
+ * @throws InvalidOperationException
+ * The input to this operation is invalid (e.g., the list of tables
names is null)
+ * @throws UnknownDBException
+ * The requested database could not be fetched.
+ * @throws TException
+ * A thrift communication error occurred
+ * @throws MetaException
+ * Any other errors
+ */
+ GetTablesResult getTables(GetTablesRequest request)
Review comment:
I agree to this.
Request/Response objects are thrift serialization objects and are not meant
for use outside the HiveMetaStore (client and server).
I also agree to your argument that if more fields are added to them
tomorrow, these will need to be accounted for across all its usages.
I have changed this and updated the pull request.
##########
File path:
standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesGetExists.java
##########
@@ -505,10 +524,69 @@ public void
testGetTableObjectsWithProjectionOfMultipleField() throws Exception
request.setDbName(DEFAULT_DATABASE);
GetProjectionsSpec projectSpec = request.getProjectionSpec();
- List<String> projectedFields = Arrays.asList("database", "tableName",
"createTime", "lastAccessTime");
+ List<String> projectedFields = Arrays.asList("dbName", "tableName",
"createTime", "lastAccessTime");
+ projectSpec.setFieldList(projectedFields);
+
+ GetTablesResult result = client.getTables(request);
+ List<Table> tables = result.getTables();
+
+ Assert.assertEquals("Found tables", 2, tables.size());
+
+ for(Table table : tables) {
+ Assert.assertTrue(table.isSetDbName());
+ Assert.assertTrue(table.isSetTableName());
+ Assert.assertTrue(table.isSetCreateTime());
+ Assert.assertFalse(table.isSetSd());
+ }
+ }
+
+ @Test
+ public void testGetTableObjectsWithProjectionOfSerDeInfoSingleValuedFields()
throws Exception {
+ List<String> tableNames = new ArrayList<>();
+ tableNames.add(testTables[0].getTableName());
+ tableNames.add(testTables[1].getTableName());
+
+ GetTablesRequest request = new GetTablesRequest();
+ request.setProjectionSpec(new GetProjectionsSpec());
+ request.setTblNames(tableNames);
+ request.setDbName(DEFAULT_DATABASE);
+
+ GetProjectionsSpec projectSpec = request.getProjectionSpec();
+ List<String> projectedFields = Arrays.asList("sd.serdeInfo.name",
"sd.serdeInfo.serializationLib", "sd.serdeInfo.description");
+ projectSpec.setFieldList(projectedFields);
+
+ GetTablesResult result = client.getTables(request);
+ List<Table> tables = result.getTables();
+
+ Assert.assertEquals("Found tables", 2, tables.size());
+
+ for(Table table : tables) {
+ Assert.assertFalse(table.isSetDbName());
+ Assert.assertTrue(table.isSetSd());
+ StorageDescriptor sd = table.getSd();
+ Assert.assertFalse(sd.isSetCols());
+ Assert.assertTrue(sd.isSetSerdeInfo());
+ SerDeInfo serDeInfo = sd.getSerdeInfo();
Review comment:
Done !
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -1912,19 +1972,19 @@ private Table convertToTable(MTable mtbl) throws
MetaException {
// for backwards compatibility with old metastore persistence
if (mtbl.getViewOriginalText() != null) {
tableType = TableType.VIRTUAL_VIEW.toString();
- } else if (Boolean.parseBoolean(mtbl.getParameters().get("EXTERNAL"))) {
+ } else if (mtbl.getParameters() != null &&
Boolean.parseBoolean(mtbl.getParameters().get("EXTERNAL"))) {
tableType = TableType.EXTERNAL_TABLE.toString();
} else {
tableType = TableType.MANAGED_TABLE.toString();
}
}
Map<String, String> parameters = convertMap(mtbl.getParameters());
boolean isAcidTable = TxnUtils.isAcidTable(parameters);
- final Table t = new Table(mtbl.getTableName(),
mtbl.getDatabase().getName(), mtbl
- .getOwner(), mtbl.getCreateTime(), mtbl.getLastAccessTime(), mtbl
- .getRetention(), convertToStorageDescriptor(mtbl.getSd(), false,
isAcidTable),
- convertToFieldSchemas(mtbl.getPartitionKeys()), parameters,
- mtbl.getViewOriginalText(), mtbl.getViewExpandedText(), tableType);
+ final Table t = new Table(mtbl.getTableName(), mtbl.getDatabase() != null
? mtbl.getDatabase().getName() : null,
+ mtbl.getOwner(), mtbl.getCreateTime(), mtbl.getLastAccessTime(),
mtbl.getRetention(),
Review comment:
Done !
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 519254)
Time Spent: 1h 50m (was: 1h 40m)
> Add the projection specification to the table request object and add
> placeholders in ObjectStore.java
> -----------------------------------------------------------------------------------------------------
>
> Key: HIVE-24397
> URL: https://issues.apache.org/jira/browse/HIVE-24397
> Project: Hive
> Issue Type: Sub-task
> Components: Hive
> Reporter: Narayanan Venkateswaran
> Assignee: Narayanan Venkateswaran
> Priority: Minor
> Labels: pull-request-available
> Time Spent: 1h 50m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)