Copilot commented on code in PR #16480: URL: https://github.com/apache/pinot/pull/16480#discussion_r2246643220
########## pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/TableNameExtractor.java: ########## @@ -0,0 +1,410 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.client; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOrderBy; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.SqlWithItem; +import org.apache.pinot.sql.parsers.CalciteSqlParser; +import org.apache.pinot.sql.parsers.SqlNodeAndOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.Map; +import java.lang.reflect.Field; Review Comment: The import statements are not properly organized. The reflection import should be grouped with other java.lang imports, not placed after the custom Map import. ```suggestion ``` ########## pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/TableNameExtractor.java: ########## @@ -0,0 +1,410 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.client; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOrderBy; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.SqlWithItem; +import org.apache.pinot.sql.parsers.CalciteSqlParser; +import org.apache.pinot.sql.parsers.SqlNodeAndOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.Map; +import java.lang.reflect.Field; +/** + * Helper class to extract table names from Calcite SqlNode tree. + */ +public class TableNameExtractor { + private static final Logger LOGGER = LoggerFactory.getLogger(TableNameExtractor.class); + // Static map of reserved SQL keywords loaded from config file + private static final Map<String, Boolean> RESERVED_KEYWORDS = loadReservedKeywords(); + /** + * Returns the name of all the tables used in a sql query. + * + * @param query The SQL query string to analyze + * @return name of all the tables used in a sql query, or null if parsing fails + */ + @Nullable + public static String[] resolveTableName(String query) { + SqlNodeAndOptions sqlNodeAndOptions; + try { + sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(query); + } catch (Exception e) { + LOGGER.error("Cannot parse table name from query: {}. Fallback to broker selector default.", query, e); + return null; + } + try { + Set<String> tableNames = extractTableNamesFromMultiStageQuery(sqlNodeAndOptions.getSqlNode()); + if (tableNames != null) { + return tableNames.toArray(new String[0]); + } + } catch (Exception e) { + LOGGER.error("Cannot extract table name from query: {}. Fallback to broker selector default.", query, e); + } + return null; + } + /** + * Extracts table names from a multi-stage query using Calcite SQL AST traversal. + * + * @param sqlNode The root SqlNode of the parsed query + * @return Set of table names found in the query + */ + private static Set<String> extractTableNamesFromMultiStageQuery(SqlNode sqlNode) { + TableNameExtractor extractor = new TableNameExtractor(); + try { + extractor.extractTableNames(sqlNode); + return extractor.getTableNames(); + } catch (Exception e) { + LOGGER.debug("Failed to extract table names from multi-stage query", e); + return Collections.emptySet(); + } + } + private final Set<String> _tableNames = new HashSet<>(); + private final Set<String> _cteNames = new HashSet<>(); + private boolean _inFromClause = false; + public Set<String> getTableNames() { + return _tableNames; + } + public void extractTableNames(SqlNode node) { + if (node == null) { + return; + } + if (node instanceof SqlWith) { + visitWith((SqlWith) node); + } else if (node instanceof SqlOrderBy) { + visitOrderBy((SqlOrderBy) node); + } else if (node instanceof SqlWithItem) { + visitWithItem((SqlWithItem) node); + } else if (node instanceof SqlSelect) { + visitSelect((SqlSelect) node); + } else if (node instanceof SqlJoin) { + visitJoin((SqlJoin) node); + } else if (node instanceof SqlBasicCall) { + visitBasicCall((SqlBasicCall) node); + } else if (node instanceof SqlIdentifier) { + visitIdentifier((SqlIdentifier) node); + } else if (node instanceof SqlNodeList) { + visitNodeList((SqlNodeList) node); + } else { + // Handle unknown node types by trying to access operands + visitUnknownNode(node); + } + } + private void visitWith(SqlWith with) { + // Visit the WITH list (CTE definitions) + if (with.withList != null) { + visitNodeList(with.withList); + } + // Visit the main query body + if (with.body != null) { + extractTableNames(with.body); + } + } + private void visitOrderBy(SqlOrderBy orderBy) { + // Visit the main query - this is the most important part + if (orderBy.query != null) { + extractTableNames(orderBy.query); + } + // Visit ORDER BY expressions for potential subqueries + if (orderBy.orderList != null) { + // Don't set inFromClause=true for ORDER BY expressions + // as they typically contain column references, not table names + visitNodeList(orderBy.orderList); + } + // Visit OFFSET clause if it contains subqueries (rare but possible) + if (orderBy.offset != null) { + extractTableNames(orderBy.offset); + } + // Visit FETCH/LIMIT clause if it contains subqueries (rare but possible) + if (orderBy.fetch != null) { + extractTableNames(orderBy.fetch); + } + } + private void visitWithItem(SqlWithItem withItem) { + // Track the CTE name so we don't treat it as a table later + if (withItem.name != null) { + String cteName = withItem.name.getSimple(); + _cteNames.add(cteName); + } + // Extract table names from the CTE query definition, not the CTE alias + if (withItem.query != null) { + extractTableNames(withItem.query); + } + } + private void visitSelect(SqlSelect select) { + // Visit FROM clause - this is where we expect to find table names + if (select.getFrom() != null) { + _inFromClause = true; + extractTableNames(select.getFrom()); + _inFromClause = false; + } + // Visit other clauses for subqueries + if (select.getWhere() != null) { + extractTableNames(select.getWhere()); + } + if (select.getGroup() != null) { + visitNodeList(select.getGroup()); + } + if (select.getHaving() != null) { + extractTableNames(select.getHaving()); + } + if (select.getOrderList() != null) { + visitNodeList(select.getOrderList()); + } + if (select.getSelectList() != null) { + visitNodeList(select.getSelectList()); + } + } + private void visitJoin(SqlJoin join) { + // Visit both sides of the join - ensure they're processed as FROM clause items + boolean wasInFromClause = _inFromClause; + if (join.getLeft() != null) { + _inFromClause = true; + extractTableNames(join.getLeft()); + } + if (join.getRight() != null) { + _inFromClause = true; + extractTableNames(join.getRight()); + } + // Visit join condition but not as part of FROM clause context + // This handles potential subqueries in join conditions while avoiding + // incorrectly extracting column references as table names + if (join.getCondition() != null) { + _inFromClause = false; + extractTableNames(join.getCondition()); + } + // Restore original context + _inFromClause = wasInFromClause; + } + private void visitBasicCall(SqlBasicCall call) { + String operatorName = call.getOperator().getName().toUpperCase(); + if (operatorName.equals("AS")) { + // Handle table aliases like "tableA AS a" + // For AS operations, the first operand is the actual table name + if (call.getOperandList().size() > 0 && call.getOperandList().get(0) != null) { + extractTableNames(call.getOperandList().get(0)); + } + } else if (operatorName.equals("WITH")) { + // Handle CTE (Common Table Expression) + visitWithClause(call); + } else if (operatorName.equals("VALUES")) { + // Handle VALUES clause - usually doesn't contain table references + // Skip this to avoid false positives + } else { + // For other basic calls, visit all operands + for (SqlNode operand : call.getOperandList()) { + if (operand != null) { + extractTableNames(operand); + } + } + } + } + private void visitIdentifier(SqlIdentifier identifier) { + // Only extract table names when we're in a FROM clause + if (_inFromClause && identifier.names.size() >= 1) { + String tableName = identifier.names.get(identifier.names.size() - 1); + // Filter out SQL keywords, system identifiers, and CTE names + if (!isReservedKeyword(tableName) && !tableName.startsWith("$") && !_cteNames.contains(tableName)) { + _tableNames.add(tableName); + } + } + } + /** + * Visit a SqlNodeList by visiting each node in the list. + */ + private void visitNodeList(SqlNodeList nodeList) { + if (nodeList != null) { + for (SqlNode node : nodeList) { + if (node != null) { + extractTableNames(node); + } + } + } + } + /** + * Handle unknown node types by attempting to visit their operands. + */ + private void visitUnknownNode(SqlNode node) { + try { + // Try to get operands list using reflection or common methods + if (node.getKind() != null) { + switch (node.getKind().name()) { + case "WITH": + visitWithClause(node); + break; + case "ORDER_BY": + visitOrderByCall(node); + break; + default: + // For other unknown nodes, try to visit operands if they exist + visitNodeOperands(node); + break; + } + } else { + visitNodeOperands(node); + } + } catch (Exception e) { + // Ignore reflection errors and continue + } + } + /** + * Handle WITH clause (CTE - Common Table Expression). + */ + private void visitWithClause(SqlNode node) { + try { + // WITH clause typically has operands: [with_list, query] + if (node instanceof SqlBasicCall) { + SqlBasicCall withCall = (SqlBasicCall) node; + for (SqlNode operand : withCall.getOperandList()) { + if (operand != null) { + extractTableNames(operand); + } + } + } + } catch (Exception e) { + // Fallback to generic operand handling + visitNodeOperands(node); + } + } + /** + * Handle ORDER BY clause - this method is now replaced by visitOrderBy(SqlOrderBy). + * Keeping for backward compatibility with visitUnknownNode. + */ + private void visitOrderByCall(SqlNode node) { + try { + if (node instanceof SqlBasicCall) { + SqlBasicCall orderByCall = (SqlBasicCall) node; + // ORDER BY typically has [query, order_list] + for (SqlNode operand : orderByCall.getOperandList()) { + if (operand != null) { + extractTableNames(operand); + } + } + } + } catch (Exception e) { + visitNodeOperands(node); + } + } + /** + * Generic method to visit node operands when specific handling is not available. + */ + private void visitNodeOperands(SqlNode node) { + try { + // Try to access operands through common interface + if (node instanceof SqlBasicCall) { + SqlBasicCall call = (SqlBasicCall) node; + for (SqlNode operand : call.getOperandList()) { + if (operand != null) { + extractTableNames(operand); + } + } + } + } catch (Exception e) { + // Nothing more we can do + } + } + /** + * Check if the given name is a reserved SQL keyword that shouldn't be treated as a table name. + */ + private boolean isReservedKeyword(String name) { + if (name == null) { + return true; + } + String upperName = name.toUpperCase(); + return RESERVED_KEYWORDS.containsKey(upperName); + } + /** + * Load reserved SQL keywords from the SqlParserImplConstants. + * This method uses the generated constants from the parser to get all reserved keywords. + */ + private static Map<String, Boolean> loadReservedKeywords() { + Map<String, Boolean> reservedKeywords = new HashMap<>(); + try { + // Use reflection to access SqlParserImplConstants.tokenImage + Class<?> constantsClass = Class.forName("org.apache.pinot.sql.parsers.parser.SqlParserImplConstants"); + Field tokenImageField = constantsClass.getField("tokenImage"); + String[] tokenImage = (String[]) tokenImageField.get(null); + + // Process each token to extract reserved keywords + for (String token : tokenImage) { + // Skip tokens that are not keywords (like literals, operators, etc.) + if (token.startsWith("\"") && token.endsWith("\"") && !token.startsWith("\"<")) { + // Extract the keyword without quotes + String keyword = token.substring(1, token.length() - 1); + // Skip single character tokens and operators + if (keyword.length() > 1 && !isOperator(keyword)) { + reservedKeywords.put(keyword, true); + } + } + } + LOGGER.debug("Loaded {} reserved keywords from SqlParserImplConstants", reservedKeywords.size()); + } catch (Exception e) { + LOGGER.warn("Failed to load reserved keywords from SqlParserImplConstants, using fallback set", e); + // Fall back to essential reserved keywords + addFallbackReservedKeywords(reservedKeywords); + } Review Comment: Using reflection to access parser constants creates a fragile dependency. Consider using a static list of keywords or finding a more robust API-based approach to avoid potential ClassNotFoundException if the parser implementation changes. ```suggestion // Define a static list of reserved SQL keywords String[] keywords = { "SELECT", "FROM", "WHERE", "GROUP", "BY", "HAVING", "ORDER", "LIMIT", "JOIN", "ON", "AS", "AND", "OR", "NOT", "INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "ALTER", "TABLE", "INDEX", "VIEW", "UNION", "ALL", "DISTINCT" }; // Populate the reserved keywords map for (String keyword : keywords) { reservedKeywords.put(keyword, true); } LOGGER.debug("Loaded {} reserved keywords from static list", reservedKeywords.size()); ``` ########## pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/TableNameExtractor.java: ########## @@ -0,0 +1,410 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.client; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOrderBy; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.SqlWithItem; +import org.apache.pinot.sql.parsers.CalciteSqlParser; +import org.apache.pinot.sql.parsers.SqlNodeAndOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.Map; +import java.lang.reflect.Field; +/** + * Helper class to extract table names from Calcite SqlNode tree. + */ +public class TableNameExtractor { + private static final Logger LOGGER = LoggerFactory.getLogger(TableNameExtractor.class); + // Static map of reserved SQL keywords loaded from config file + private static final Map<String, Boolean> RESERVED_KEYWORDS = loadReservedKeywords(); + /** + * Returns the name of all the tables used in a sql query. + * + * @param query The SQL query string to analyze + * @return name of all the tables used in a sql query, or null if parsing fails + */ + @Nullable + public static String[] resolveTableName(String query) { + SqlNodeAndOptions sqlNodeAndOptions; + try { + sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(query); + } catch (Exception e) { + LOGGER.error("Cannot parse table name from query: {}. Fallback to broker selector default.", query, e); + return null; + } + try { + Set<String> tableNames = extractTableNamesFromMultiStageQuery(sqlNodeAndOptions.getSqlNode()); + if (tableNames != null) { + return tableNames.toArray(new String[0]); + } + } catch (Exception e) { + LOGGER.error("Cannot extract table name from query: {}. Fallback to broker selector default.", query, e); + } + return null; + } + /** + * Extracts table names from a multi-stage query using Calcite SQL AST traversal. + * + * @param sqlNode The root SqlNode of the parsed query + * @return Set of table names found in the query + */ + private static Set<String> extractTableNamesFromMultiStageQuery(SqlNode sqlNode) { + TableNameExtractor extractor = new TableNameExtractor(); + try { + extractor.extractTableNames(sqlNode); + return extractor.getTableNames(); + } catch (Exception e) { + LOGGER.debug("Failed to extract table names from multi-stage query", e); + return Collections.emptySet(); + } + } + private final Set<String> _tableNames = new HashSet<>(); + private final Set<String> _cteNames = new HashSet<>(); + private boolean _inFromClause = false; + public Set<String> getTableNames() { + return _tableNames; + } + public void extractTableNames(SqlNode node) { + if (node == null) { + return; + } + if (node instanceof SqlWith) { + visitWith((SqlWith) node); + } else if (node instanceof SqlOrderBy) { + visitOrderBy((SqlOrderBy) node); + } else if (node instanceof SqlWithItem) { + visitWithItem((SqlWithItem) node); + } else if (node instanceof SqlSelect) { + visitSelect((SqlSelect) node); + } else if (node instanceof SqlJoin) { + visitJoin((SqlJoin) node); + } else if (node instanceof SqlBasicCall) { + visitBasicCall((SqlBasicCall) node); + } else if (node instanceof SqlIdentifier) { + visitIdentifier((SqlIdentifier) node); + } else if (node instanceof SqlNodeList) { + visitNodeList((SqlNodeList) node); + } else { + // Handle unknown node types by trying to access operands + visitUnknownNode(node); + } + } + private void visitWith(SqlWith with) { + // Visit the WITH list (CTE definitions) + if (with.withList != null) { + visitNodeList(with.withList); + } + // Visit the main query body + if (with.body != null) { + extractTableNames(with.body); + } + } + private void visitOrderBy(SqlOrderBy orderBy) { + // Visit the main query - this is the most important part + if (orderBy.query != null) { + extractTableNames(orderBy.query); + } + // Visit ORDER BY expressions for potential subqueries + if (orderBy.orderList != null) { + // Don't set inFromClause=true for ORDER BY expressions + // as they typically contain column references, not table names + visitNodeList(orderBy.orderList); + } + // Visit OFFSET clause if it contains subqueries (rare but possible) + if (orderBy.offset != null) { + extractTableNames(orderBy.offset); + } + // Visit FETCH/LIMIT clause if it contains subqueries (rare but possible) + if (orderBy.fetch != null) { + extractTableNames(orderBy.fetch); + } + } + private void visitWithItem(SqlWithItem withItem) { + // Track the CTE name so we don't treat it as a table later + if (withItem.name != null) { + String cteName = withItem.name.getSimple(); + _cteNames.add(cteName); + } + // Extract table names from the CTE query definition, not the CTE alias + if (withItem.query != null) { + extractTableNames(withItem.query); + } + } + private void visitSelect(SqlSelect select) { + // Visit FROM clause - this is where we expect to find table names + if (select.getFrom() != null) { + _inFromClause = true; + extractTableNames(select.getFrom()); + _inFromClause = false; + } + // Visit other clauses for subqueries + if (select.getWhere() != null) { + extractTableNames(select.getWhere()); + } + if (select.getGroup() != null) { + visitNodeList(select.getGroup()); + } + if (select.getHaving() != null) { + extractTableNames(select.getHaving()); + } + if (select.getOrderList() != null) { + visitNodeList(select.getOrderList()); + } + if (select.getSelectList() != null) { + visitNodeList(select.getSelectList()); + } + } + private void visitJoin(SqlJoin join) { + // Visit both sides of the join - ensure they're processed as FROM clause items + boolean wasInFromClause = _inFromClause; + if (join.getLeft() != null) { + _inFromClause = true; + extractTableNames(join.getLeft()); + } + if (join.getRight() != null) { + _inFromClause = true; + extractTableNames(join.getRight()); + } + // Visit join condition but not as part of FROM clause context + // This handles potential subqueries in join conditions while avoiding + // incorrectly extracting column references as table names + if (join.getCondition() != null) { + _inFromClause = false; + extractTableNames(join.getCondition()); + } + // Restore original context + _inFromClause = wasInFromClause; + } + private void visitBasicCall(SqlBasicCall call) { + String operatorName = call.getOperator().getName().toUpperCase(); + if (operatorName.equals("AS")) { + // Handle table aliases like "tableA AS a" + // For AS operations, the first operand is the actual table name + if (call.getOperandList().size() > 0 && call.getOperandList().get(0) != null) { + extractTableNames(call.getOperandList().get(0)); + } + } else if (operatorName.equals("WITH")) { + // Handle CTE (Common Table Expression) + visitWithClause(call); + } else if (operatorName.equals("VALUES")) { + // Handle VALUES clause - usually doesn't contain table references + // Skip this to avoid false positives + } else { + // For other basic calls, visit all operands + for (SqlNode operand : call.getOperandList()) { + if (operand != null) { + extractTableNames(operand); + } + } + } + } + private void visitIdentifier(SqlIdentifier identifier) { + // Only extract table names when we're in a FROM clause + if (_inFromClause && identifier.names.size() >= 1) { + String tableName = identifier.names.get(identifier.names.size() - 1); + // Filter out SQL keywords, system identifiers, and CTE names + if (!isReservedKeyword(tableName) && !tableName.startsWith("$") && !_cteNames.contains(tableName)) { + _tableNames.add(tableName); + } + } + } + /** + * Visit a SqlNodeList by visiting each node in the list. + */ + private void visitNodeList(SqlNodeList nodeList) { + if (nodeList != null) { + for (SqlNode node : nodeList) { + if (node != null) { + extractTableNames(node); + } + } + } + } + /** + * Handle unknown node types by attempting to visit their operands. + */ + private void visitUnknownNode(SqlNode node) { + try { + // Try to get operands list using reflection or common methods + if (node.getKind() != null) { + switch (node.getKind().name()) { + case "WITH": + visitWithClause(node); + break; + case "ORDER_BY": + visitOrderByCall(node); + break; + default: + // For other unknown nodes, try to visit operands if they exist + visitNodeOperands(node); + break; + } + } else { + visitNodeOperands(node); + } + } catch (Exception e) { Review Comment: Catching generic `Exception` is too broad. Consider catching more specific exceptions like `ReflectiveOperationException` or the specific exceptions that reflection operations can throw. ```suggestion } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { ``` ########## pinot-clients/pinot-java-client/src/main/java/org/apache/pinot/client/TableNameExtractor.java: ########## @@ -0,0 +1,410 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.client; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlOrderBy; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.SqlWithItem; +import org.apache.pinot.sql.parsers.CalciteSqlParser; +import org.apache.pinot.sql.parsers.SqlNodeAndOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.Map; +import java.lang.reflect.Field; +/** + * Helper class to extract table names from Calcite SqlNode tree. + */ +public class TableNameExtractor { + private static final Logger LOGGER = LoggerFactory.getLogger(TableNameExtractor.class); + // Static map of reserved SQL keywords loaded from config file + private static final Map<String, Boolean> RESERVED_KEYWORDS = loadReservedKeywords(); + /** + * Returns the name of all the tables used in a sql query. + * + * @param query The SQL query string to analyze + * @return name of all the tables used in a sql query, or null if parsing fails + */ + @Nullable + public static String[] resolveTableName(String query) { + SqlNodeAndOptions sqlNodeAndOptions; + try { + sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(query); + } catch (Exception e) { + LOGGER.error("Cannot parse table name from query: {}. Fallback to broker selector default.", query, e); + return null; + } + try { + Set<String> tableNames = extractTableNamesFromMultiStageQuery(sqlNodeAndOptions.getSqlNode()); + if (tableNames != null) { + return tableNames.toArray(new String[0]); + } + } catch (Exception e) { + LOGGER.error("Cannot extract table name from query: {}. Fallback to broker selector default.", query, e); + } + return null; + } + /** + * Extracts table names from a multi-stage query using Calcite SQL AST traversal. + * + * @param sqlNode The root SqlNode of the parsed query + * @return Set of table names found in the query + */ + private static Set<String> extractTableNamesFromMultiStageQuery(SqlNode sqlNode) { + TableNameExtractor extractor = new TableNameExtractor(); + try { + extractor.extractTableNames(sqlNode); + return extractor.getTableNames(); + } catch (Exception e) { + LOGGER.debug("Failed to extract table names from multi-stage query", e); + return Collections.emptySet(); + } + } + private final Set<String> _tableNames = new HashSet<>(); + private final Set<String> _cteNames = new HashSet<>(); + private boolean _inFromClause = false; + public Set<String> getTableNames() { + return _tableNames; + } + public void extractTableNames(SqlNode node) { + if (node == null) { + return; + } + if (node instanceof SqlWith) { + visitWith((SqlWith) node); + } else if (node instanceof SqlOrderBy) { + visitOrderBy((SqlOrderBy) node); + } else if (node instanceof SqlWithItem) { + visitWithItem((SqlWithItem) node); + } else if (node instanceof SqlSelect) { + visitSelect((SqlSelect) node); + } else if (node instanceof SqlJoin) { + visitJoin((SqlJoin) node); + } else if (node instanceof SqlBasicCall) { + visitBasicCall((SqlBasicCall) node); + } else if (node instanceof SqlIdentifier) { + visitIdentifier((SqlIdentifier) node); + } else if (node instanceof SqlNodeList) { + visitNodeList((SqlNodeList) node); + } else { + // Handle unknown node types by trying to access operands + visitUnknownNode(node); + } + } + private void visitWith(SqlWith with) { + // Visit the WITH list (CTE definitions) + if (with.withList != null) { + visitNodeList(with.withList); + } + // Visit the main query body + if (with.body != null) { + extractTableNames(with.body); + } + } + private void visitOrderBy(SqlOrderBy orderBy) { + // Visit the main query - this is the most important part + if (orderBy.query != null) { + extractTableNames(orderBy.query); + } + // Visit ORDER BY expressions for potential subqueries + if (orderBy.orderList != null) { + // Don't set inFromClause=true for ORDER BY expressions + // as they typically contain column references, not table names + visitNodeList(orderBy.orderList); + } + // Visit OFFSET clause if it contains subqueries (rare but possible) + if (orderBy.offset != null) { + extractTableNames(orderBy.offset); + } + // Visit FETCH/LIMIT clause if it contains subqueries (rare but possible) + if (orderBy.fetch != null) { + extractTableNames(orderBy.fetch); + } + } + private void visitWithItem(SqlWithItem withItem) { + // Track the CTE name so we don't treat it as a table later + if (withItem.name != null) { + String cteName = withItem.name.getSimple(); + _cteNames.add(cteName); + } + // Extract table names from the CTE query definition, not the CTE alias + if (withItem.query != null) { + extractTableNames(withItem.query); + } + } + private void visitSelect(SqlSelect select) { + // Visit FROM clause - this is where we expect to find table names + if (select.getFrom() != null) { + _inFromClause = true; + extractTableNames(select.getFrom()); + _inFromClause = false; + } + // Visit other clauses for subqueries + if (select.getWhere() != null) { + extractTableNames(select.getWhere()); + } + if (select.getGroup() != null) { + visitNodeList(select.getGroup()); + } + if (select.getHaving() != null) { + extractTableNames(select.getHaving()); + } + if (select.getOrderList() != null) { + visitNodeList(select.getOrderList()); + } + if (select.getSelectList() != null) { + visitNodeList(select.getSelectList()); + } + } + private void visitJoin(SqlJoin join) { + // Visit both sides of the join - ensure they're processed as FROM clause items + boolean wasInFromClause = _inFromClause; + if (join.getLeft() != null) { + _inFromClause = true; + extractTableNames(join.getLeft()); + } + if (join.getRight() != null) { + _inFromClause = true; + extractTableNames(join.getRight()); + } + // Visit join condition but not as part of FROM clause context + // This handles potential subqueries in join conditions while avoiding + // incorrectly extracting column references as table names + if (join.getCondition() != null) { + _inFromClause = false; + extractTableNames(join.getCondition()); + } + // Restore original context + _inFromClause = wasInFromClause; + } + private void visitBasicCall(SqlBasicCall call) { + String operatorName = call.getOperator().getName().toUpperCase(); + if (operatorName.equals("AS")) { + // Handle table aliases like "tableA AS a" + // For AS operations, the first operand is the actual table name + if (call.getOperandList().size() > 0 && call.getOperandList().get(0) != null) { + extractTableNames(call.getOperandList().get(0)); + } + } else if (operatorName.equals("WITH")) { + // Handle CTE (Common Table Expression) + visitWithClause(call); + } else if (operatorName.equals("VALUES")) { + // Handle VALUES clause - usually doesn't contain table references + // Skip this to avoid false positives + } else { + // For other basic calls, visit all operands + for (SqlNode operand : call.getOperandList()) { + if (operand != null) { + extractTableNames(operand); + } + } + } + } + private void visitIdentifier(SqlIdentifier identifier) { + // Only extract table names when we're in a FROM clause + if (_inFromClause && identifier.names.size() >= 1) { + String tableName = identifier.names.get(identifier.names.size() - 1); + // Filter out SQL keywords, system identifiers, and CTE names + if (!isReservedKeyword(tableName) && !tableName.startsWith("$") && !_cteNames.contains(tableName)) { + _tableNames.add(tableName); + } + } + } + /** + * Visit a SqlNodeList by visiting each node in the list. + */ + private void visitNodeList(SqlNodeList nodeList) { + if (nodeList != null) { + for (SqlNode node : nodeList) { + if (node != null) { + extractTableNames(node); + } + } + } + } + /** + * Handle unknown node types by attempting to visit their operands. + */ + private void visitUnknownNode(SqlNode node) { Review Comment: The `visitUnknownNode` method uses string comparison on `node.getKind().name()` in a switch statement, which is less efficient than using enum comparison. Consider switching on `node.getKind()` directly if possible. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
