vnhive commented on a change in pull request #2037:
URL: https://github.com/apache/hive/pull/2037#discussion_r596729081
##########
File path: common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
##########
@@ -471,6 +471,9 @@
AMBIGUOUS_STRUCT_ATTRIBUTE(10423, "Attribute \"{0}\" specified more than
once in structured type.", true),
OFFSET_NOT_SUPPORTED_IN_SUBQUERY(10424, "OFFSET is not supported in subquery
of exists", true),
WITH_COL_LIST_NUM_OVERFLOW(10425, "WITH-clause query {0} returns {1}
columns, but {2} labels were specified. The number of column labels must be
smaller or equal to the number of expressions returned by the query.", true),
+ DATACONNECTOR_ALREADY_EXISTS(10426, "Dataconnector {0} already exists",
true),
+ DATACONNECTOR_NOT_EXISTS(10427, "Dataconnector does not exist:"),
Review comment:
This should be Dataconnector {0} does not exist
since the error message is being used with the connector name
throw new HiveException(ErrorMsg.DATACONNECTOR_NOT_EXISTS,
desc.getConnectorName());
##########
File path:
itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
##########
@@ -244,6 +245,32 @@ public boolean alterDatabase(String catName, String
dbName, Database db)
return objectStore.getAllDatabases(catName);
}
+ @Override
+ public List<String> getAllDataConnectors() throws MetaException {
Review comment:
I don't have a strong opinion on this, but just wanted to bring it up
that,
if a method returns just the list of all the names of connectors, it is
better to name it getAllDataConnectorNames().
See for example there is a method getDataConnector() that returns a
DataConnector object. Whenever possible it would be nice to make the semantics
evident from the nomenclature.
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/database/desc/DescDatabaseFormatter.java
##########
@@ -104,6 +113,14 @@ void showDatabaseDescription(DataOutputStream out, String
database, String comme
out.write(ownerType.name().getBytes(StandardCharsets.UTF_8));
}
out.write(Utilities.tabCode);
+ if (connectorName != null) {
Review comment:
nit picking : but why <variable> != null here and null != <variable>
previously ? Can you please consider sticking to one style ?
Also java.util.Objects.isNull can used to check for null
##########
File path: parser/src/java/org/apache/hadoop/hive/ql/parse/CreateDDLParser.g
##########
@@ -108,3 +108,45 @@ createTableStatement
selectStatementWithCTE?
)
;
+
+createDataConnectorStatement
+@init { gParent.pushMsg("create connector statement", state); }
+@after { gParent.popMsg(state); }
+ : KW_CREATE KW_DATACONNECTOR ifNotExists? name=identifier
dataConnectorType dataConnectorUrl dataConnectorComment? ( KW_WITH
KW_DCPROPERTIES dcprops=dcProperties)?
+ -> ^(TOK_CREATEDATACONNECTOR $name ifNotExists? dataConnectorType
dataConnectorUrl dataConnectorComment? $dcprops?)
+ ;
+
+dataConnectorComment
+@init { gParent.pushMsg("dataconnector comment", state); }
+@after { gParent.popMsg(state); }
+ : KW_COMMENT comment=StringLiteral
+ -> ^(TOK_DATACONNECTORCOMMENT $comment)
+ ;
+
+dataConnectorUrl
+@init { gParent.pushMsg("dataconnector URL", state); }
+@after { gParent.popMsg(state); }
+ : KW_URL url=StringLiteral
+ -> ^(TOK_DATACONNECTORURL $url)
+ ;
+
+dataConnectorType
+@init { gParent.pushMsg("dataconnector type", state); }
+@after { gParent.popMsg(state); }
+ : KW_TYPE dcType=StringLiteral
+ -> ^(TOK_DATACONNECTORTYPE $dcType)
+ ;
+
+dcProperties
+@init { gParent.pushMsg("dcproperties", state); }
+@after { gParent.popMsg(state); }
+ :
+ LPAREN dbPropertiesList RPAREN -> ^(TOK_DATACONNECTORPROPERTIES
dbPropertiesList)
+ ;
+
+dropDataConnectorStatement
+@init { gParent.pushMsg("drop connector statement", state); }
+@after { gParent.popMsg(state); }
+ : KW_DROP (KW_DATACONNECTOR) ifExists? identifier
Review comment:
Is the parenthesis surrounding KW_DATACONNECTOR required?
##########
File path: parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
##########
@@ -1104,14 +1125,16 @@ orReplace
createDatabaseStatement
@init { pushMsg("create database statement", state); }
@after { popMsg(state); }
- : KW_CREATE (KW_DATABASE|KW_SCHEMA)
+ : KW_CREATE (remote=KW_REMOTE)? (KW_DATABASE|KW_SCHEMA)
ifNotExists?
name=identifier
databaseComment?
dbLocation?
dbManagedLocation?
+ dbConnectorName?
(KW_WITH KW_DBPROPERTIES dbprops=dbProperties)?
Review comment:
I thought if we have dbConnectorName, we won't need dbprops
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseAnalyzer.java
##########
@@ -70,19 +73,43 @@ public void analyzeInternal(ASTNode root) throws
SemanticException {
managedLocationUri =
unescapeSQLString(childNode.getChild(0).getText());
outputs.add(toWriteEntity(managedLocationUri));
break;
+ case HiveParser.TOK_DATACONNECTOR:
+ type = "REMOTE";
+ // locationUri = "REMOTE_DATABASE"; // TODO
Review comment:
Could you please leave a comment on what needs to be for the TODO ?
##########
File path: parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
##########
@@ -1041,6 +1060,8 @@ ddlStatement
| abortTransactionStatement
| killQueryStatement
| resourcePlanDdlStatements
+ | createDataConnectorStatement
Review comment:
Does the alterDataConnectorStatementSuffix need to be listed here
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseAnalyzer.java
##########
@@ -70,19 +73,43 @@ public void analyzeInternal(ASTNode root) throws
SemanticException {
managedLocationUri =
unescapeSQLString(childNode.getChild(0).getText());
outputs.add(toWriteEntity(managedLocationUri));
break;
+ case HiveParser.TOK_DATACONNECTOR:
+ type = "REMOTE";
+ // locationUri = "REMOTE_DATABASE"; // TODO
+ ASTNode nextNode = (ASTNode) root.getChild(i);
+ connectorName = ((ASTNode)nextNode).getChild(0).getText();
+ outputs.add(toWriteEntity(connectorName));
+ // outputs.remove(toWriteEntity(locationUri));
+ if (managedLocationUri != null) {
+ outputs.remove(toWriteEntity(managedLocationUri));
+ managedLocationUri = null;
+ }
+ break;
default:
throw new SemanticException("Unrecognized token in CREATE DATABASE
statement");
}
}
- CreateDatabaseDesc desc = new CreateDatabaseDesc(databaseName, comment,
locationUri, managedLocationUri,
- ifNotExists, props);
- rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
desc)));
-
+ CreateDatabaseDesc desc = null;
Database database = new Database(databaseName, comment, locationUri,
props);
- if (managedLocationUri != null) {
- database.setManagedLocationUri(managedLocationUri);
+ if (type.equalsIgnoreCase("NATIVE")) {
+ desc = new CreateDatabaseDesc(databaseName, comment, locationUri,
managedLocationUri, ifNotExists, props);
+ database.setType(DatabaseType.NATIVE);
+ // database = new Database(databaseName, comment, locationUri, props);
+ if (managedLocationUri != null) {
+ database.setManagedLocationUri(managedLocationUri);
+ }
+ } else {
+ String remoteDbName = databaseName;
+ if (props != null && props.get("connector.remoteDbName") != null) //
TODO finalize the property name
+ remoteDbName = props.get("connector.remoteDbName");
+ desc = new CreateDatabaseDesc(databaseName, comment, locationUri, null,
ifNotExists, props, type,
Review comment:
remoteDbName and managedLocationUri are basically the same right ?
or is it that
connector url + remoteDbName = managedLocationUri ?
##########
File path: parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
##########
@@ -1104,14 +1125,16 @@ orReplace
createDatabaseStatement
@init { pushMsg("create database statement", state); }
@after { popMsg(state); }
- : KW_CREATE (KW_DATABASE|KW_SCHEMA)
+ : KW_CREATE (remote=KW_REMOTE)? (KW_DATABASE|KW_SCHEMA)
ifNotExists?
name=identifier
databaseComment?
dbLocation?
dbManagedLocation?
+ dbConnectorName?
(KW_WITH KW_DBPROPERTIES dbprops=dbProperties)?
- -> ^(TOK_CREATEDATABASE $name ifNotExists? dbLocation? dbManagedLocation?
databaseComment? $dbprops?)
+ -> {$remote != null}? ^(TOK_CREATEDATABASE $name ifNotExists?
databaseComment? $dbprops? dbConnectorName?)
Review comment:
won't the dbLocation and dbManagedLocation be needed even if the
connector name existed. I thought it the connector encapsulated the connection
information in the database properties.
##########
File path: parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
##########
@@ -355,6 +364,11 @@ TOK_ALTERDATABASE_PROPERTIES;
TOK_ALTERDATABASE_OWNER;
TOK_ALTERDATABASE_LOCATION;
TOK_ALTERDATABASE_MANAGEDLOCATION;
+TOK_DATACONNECTORPROPERTIES;
Review comment:
This seems like a redundant definition. Defined in line no 169 and 367.
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseAnalyzer.java
##########
@@ -70,19 +73,43 @@ public void analyzeInternal(ASTNode root) throws
SemanticException {
managedLocationUri =
unescapeSQLString(childNode.getChild(0).getText());
outputs.add(toWriteEntity(managedLocationUri));
break;
+ case HiveParser.TOK_DATACONNECTOR:
+ type = "REMOTE";
+ // locationUri = "REMOTE_DATABASE"; // TODO
+ ASTNode nextNode = (ASTNode) root.getChild(i);
+ connectorName = ((ASTNode)nextNode).getChild(0).getText();
+ outputs.add(toWriteEntity(connectorName));
+ // outputs.remove(toWriteEntity(locationUri));
Review comment:
Can you please explain why you have commented out this code ? What
should be done here?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/database/desc/DescDatabaseFormatter.java
##########
@@ -69,6 +71,12 @@ void showDatabaseDescription(DataOutputStream out, String
database, String comme
if (ownerType != null) {
builder.put("ownerType", ownerType.name());
}
+ if (null != connectorName) {
Review comment:
Should there be a field that says whether the database is native or
remote?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/database/desc/DescDatabaseFormatter.java
##########
@@ -104,6 +113,14 @@ void showDatabaseDescription(DataOutputStream out, String
database, String comme
out.write(ownerType.name().getBytes(StandardCharsets.UTF_8));
}
out.write(Utilities.tabCode);
+ if (connectorName != null) {
+ out.write(connectorName.getBytes(StandardCharsets.UTF_8));
+ }
+ out.write(Utilities.tabCode);
+ if (remoteDbName != null) {
+ out.write(remoteDbName.getBytes(StandardCharsets.UTF_8));
+ }
+ out.write(Utilities.tabCode);
Review comment:
is out.write(Utilities .tabCode) deliberately repeated twice in your
changes ?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/database/create/CreateDatabaseOperation.java
##########
@@ -46,14 +48,20 @@ public int execute() throws HiveException {
desc.getDatabaseProperties());
database.setOwnerName(SessionState.getUserFromAuthenticator());
database.setOwnerType(PrincipalType.USER);
- if (desc.getManagedLocationUri() != null) {
- database.setManagedLocationUri(desc.getManagedLocationUri());
- }
-
+ database.setType(desc.getDatabaseType());
try {
- makeLocationQualified(database);
- if
(database.getLocationUri().equalsIgnoreCase(database.getManagedLocationUri())) {
- throw new HiveException("Managed and external locations for database
cannot be the same");
+ if (desc.getDatabaseType() == DatabaseType.NATIVE) {
+ if (desc.getManagedLocationUri() != null) {
+ database.setManagedLocationUri(desc.getManagedLocationUri());
+ }
+ makeLocationQualified(database);
+ if
(database.getLocationUri().equalsIgnoreCase(database.getManagedLocationUri())) {
+ throw new HiveException("Managed and external locations for database
cannot be the same");
+ }
+ } else {
Review comment:
Isn't the following probably better ?
else if(desc.getDatabaseType() == DatabaseType.REMOTE) {
}
else {
<throw an unrecognized database type exception>
}
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]