Github user mattyb149 commented on a diff in the pull request:
https://github.com/apache/nifi/pull/2239#discussion_r148636452
--- Diff:
nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHiveQLProcessor.java
---
@@ -216,4 +225,104 @@ protected void setParameter(final PreparedStatement
stmt, final String attrName,
}
}
+ protected static class TableName {
+ private final String database;
+ private final String table;
+ private boolean input = true;
+
+ public TableName(String database, String table) {
+ this.database = database;
+ this.table = table;
+ }
+
+ public void setInput(boolean input) {
+ this.input = input;
+ }
+
+ public boolean isInput() {
+ return input;
+ }
+
+ @Override
+ public String toString() {
+ return database == null || database.isEmpty() ? table :
database + '.' + table;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ TableName tableName = (TableName) o;
+
+ if (database != null ? !database.equals(tableName.database) :
tableName.database != null) return false;
+ return table.equals(tableName.table);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = database != null ? database.hashCode() : 0;
+ result = 31 * result + table.hashCode();
+ return result;
+ }
+ }
+
+ protected Set<TableName> findTableNames(final String query) throws
ParseException {
+ final ASTNode node = new ParseDriver().parse(normalize(query));
+ final HashSet<TableName> tableNames = new HashSet<>();
+ findTableNames(node, tableNames);
+ return tableNames;
+ }
+
+ /**
+ * Normalize query.
+ * Hive resolves prepared statement parameters before executing a
query,
+ * see {@link
org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for
detail.
+ * HiveParser does not expect '?' to be in a query string, and throws
an Exception if there is one.
+ * In this normalize method, '?' is replaced to 'x' to avoid that.
+ */
+ private String normalize(String query) {
+ return query.replace('?', 'x');
+ }
+
+ private void findTableNames(final Object obj, final Set<TableName>
tableNames) {
+ if (!(obj instanceof CommonTree)) {
+ return;
+ }
+ final CommonTree tree = (CommonTree) obj;
+ final int childCount = tree.getChildCount();
+ if ("TOK_TABNAME".equals(tree.getText())) {
+ final TableName tableName;
+ switch (childCount) {
+ case 1 :
+ tableName = new TableName(null,
tree.getChild(0).getText());
+ break;
+ case 2:
+ tableName = new TableName(tree.getChild(0).getText(),
tree.getChild(1).getText());
+ break;
+ default:
+ throw new IllegalStateException("TOK_TABNAME does not
have expected children, childCount=" + childCount);
+ }
+ // If parent is TOK_TABREF, then it is an input table.
+
tableName.setInput("TOK_TABREF".equals(tree.getParent().getText()));
--- End diff --
We may need setInput and setOutput (and the getters) here, since a table
could technically be input and output, either in the same statement or multiple
statements. See my other comment for an example, also this one generates
nothing in query.input_tables:
```
insert into t values (8,"eight");
create table if not exists t3 (id int, name string);
insert overwrite table t3 select distinct id,name from t;
```
---