eschutho commented on code in PR #26767: URL: https://github.com/apache/superset/pull/26767#discussion_r1482142625
########## superset/sql_parse.py: ########## @@ -252,6 +253,182 @@ def __eq__(self, __o: object) -> bool: return str(self) == str(__o) +def extract_tables_from_statement( + statement: exp.Expression, + dialect: Optional[Dialects], +) -> set[Table]: + """ + Extract all table references in a single statement. + + Please not that this is not trivial; consider the following queries: + + DESCRIBE some_table; + SHOW PARTITIONS FROM some_table; + WITH masked_name AS (SELECT * FROM some_table) SELECT * FROM masked_name; + + See the unit tests for other tricky cases. + """ + sources: Iterable[exp.Table] + + if isinstance(statement, exp.Describe): + # A `DESCRIBE` query has no sources in sqlglot, so we need to explicitly + # query for all tables. + sources = statement.find_all(exp.Table) + elif isinstance(statement, exp.Command): + # Commands, like `SHOW COLUMNS FROM foo`, have to be converted into a + # `SELECT` statetement in order to extract tables. + literal = statement.find(exp.Literal) + if not literal: + return set() + + pseudo_query = parse_one(f"SELECT {literal.this}", dialect=dialect) + sources = pseudo_query.find_all(exp.Table) + else: + sources = [ + source + for scope in traverse_scope(statement) + for source in scope.sources.values() + if isinstance(source, exp.Table) and not is_cte(source, scope) + ] + + return { + Table( + source.name, + source.db if source.db != "" else None, + source.catalog if source.catalog != "" else None, + ) + for source in sources + } + + +def is_cte(source: exp.Table, scope: Scope) -> bool: + """ + Is the source a CTE? + + CTEs in the parent scope look like tables (and are represented by + exp.Table objects), but should not be considered as such; + otherwise a user with access to table `foo` could access any table + with a query like this: + + WITH foo AS (SELECT * FROM target_table) SELECT * FROM foo + + """ + parent_sources = scope.parent.sources if scope.parent else {} + ctes_in_scope = { + name + for name, parent_scope in parent_sources.items() + if isinstance(parent_scope, Scope) and parent_scope.scope_type == ScopeType.CTE + } + + return source.name in ctes_in_scope + + +class SQLQuery: + """ + A SQL query, with 0+ statements. + """ + + def __init__( + self, + query: str, + engine: Optional[str] = None, + ): + dialect = SQLGLOT_DIALECTS.get(engine) if engine else None + + self.statements = [ + SQLStatement(statement, engine=engine) + for statement in parse(query, dialect=dialect) + if statement + ] + + def format(self, comments: bool = True) -> str: + """ + Pretty-format the SQL query. + """ + return ";\n".join(statement.format(comments) for statement in self.statements) + + def get_settings(self) -> dict[str, str]: + """ + Return the settings for the SQL query. + + >>> statement = SQLQuery("SET foo = 'bar'; SET foo = 'baz'") + >>> statement.get_settings() + {"foo": "'baz'"} + + """ + settings: dict[str, str] = {} + for statement in self.statements: + settings.update(statement.get_settings()) + + return settings + + +class SQLStatement: Review Comment: I had a similar question, reading `SQLQuery` as solely a data retrieval query or a type of statement, rather than a collection of statements. `SQLInstruction` as another option? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For additional commands, e-mail: notifications-h...@superset.apache.org