IMPALA-2336: Ignore trailing comments in non-interactive mode This patch trims trailing comments while parsing queries in non-interactive mode. Users usually have comments in the end of the script which should be ignored. Without this patch, the script fails with an exception since it expects a valid SQL. The behavior however remains the same with interactive mode.
Change-Id: I723763ef7eedd03cf22058fadf06e9673a0d94d2 Reviewed-on: http://gerrit.cloudera.org:8080/3169 Reviewed-by: Casey Ching <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5ede8eb8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5ede8eb8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5ede8eb8 Branch: refs/heads/master Commit: 5ede8eb8a7bb0ed455610eec89c3ae49898b778e Parents: 816735a Author: Bharath Vissapragada <[email protected]> Authored: Mon May 23 07:37:02 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Tue May 31 23:32:11 2016 -0700 ---------------------------------------------------------------------- shell/impala_shell.py | 18 +++++++++++++++++- tests/shell/test_file_comments.sql | 6 ++++++ 2 files changed, 23 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5ede8eb8/shell/impala_shell.py ---------------------------------------------------------------------- diff --git a/shell/impala_shell.py b/shell/impala_shell.py index 1f82186..6e37aae 100755 --- a/shell/impala_shell.py +++ b/shell/impala_shell.py @@ -1167,7 +1167,23 @@ def print_to_stderr(message): def parse_query_text(query_text, utf8_encode_policy='strict'): """Parse query file text to extract queries and encode into utf-8""" - return [q.encode('utf-8', utf8_encode_policy) for q in sqlparse.split(query_text)] + query_list = [q.encode('utf-8', utf8_encode_policy) for q in sqlparse.split(query_text)] + # Remove trailing comments in the input, if any. We do this because sqlparse splits the + # input at query boundaries and associates the query only with preceding comments + # (following comments are associated with the next query). This is a problem with + # trailing comments. For example, consider the following input: + # ------------- + # -- comment1 + # select 1; + # -- comment2 + # ------------- + # When sqlparse splits the query, "comment1" is associated with the query "select 1" and + # "--comment2" is sent as is. Impala's parser however doesn't consider it a valid SQL + # and throws an exception. We identify such trailing comments and ignore them (do not + # send them to Impala). + if query_list and not sqlparse.format(query_list[-1], strip_comments=True).strip("\n"): + query_list.pop() + return query_list def parse_variables(keyvals): """Parse variable assignments passed as arguments in the command line""" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5ede8eb8/tests/shell/test_file_comments.sql ---------------------------------------------------------------------- diff --git a/tests/shell/test_file_comments.sql b/tests/shell/test_file_comments.sql index e732dfc..49b3c0f 100644 --- a/tests/shell/test_file_comments.sql +++ b/tests/shell/test_file_comments.sql @@ -11,3 +11,9 @@ SELECT 1+ 1; SELECT /* This comment block is OK */ 2; +/* Trailing comment -1 */ +-- Trailing comment -2 +/* +-- query inside a comment +select 2; +/* nested comments end */
