IMPALA-2336: Ignore trailing comments in non-interactive mode

This patch trims trailing comments while parsing queries in
non-interactive mode. Users usually have comments in the end
of the script which should be ignored. Without this patch,
the script fails with an exception since it expects a valid
SQL. The behavior however remains the same with interactive
mode.

Change-Id: I723763ef7eedd03cf22058fadf06e9673a0d94d2
Reviewed-on: http://gerrit.cloudera.org:8080/3169
Reviewed-by: Casey Ching <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5ede8eb8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5ede8eb8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5ede8eb8

Branch: refs/heads/master
Commit: 5ede8eb8a7bb0ed455610eec89c3ae49898b778e
Parents: 816735a
Author: Bharath Vissapragada <[email protected]>
Authored: Mon May 23 07:37:02 2016 -0700
Committer: Tim Armstrong <[email protected]>
Committed: Tue May 31 23:32:11 2016 -0700

----------------------------------------------------------------------
 shell/impala_shell.py              | 18 +++++++++++++++++-
 tests/shell/test_file_comments.sql |  6 ++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5ede8eb8/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 1f82186..6e37aae 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1167,7 +1167,23 @@ def print_to_stderr(message):
 
 def parse_query_text(query_text, utf8_encode_policy='strict'):
   """Parse query file text to extract queries and encode into utf-8"""
-  return [q.encode('utf-8', utf8_encode_policy) for q in 
sqlparse.split(query_text)]
+  query_list = [q.encode('utf-8', utf8_encode_policy) for q in 
sqlparse.split(query_text)]
+  # Remove trailing comments in the input, if any. We do this because sqlparse 
splits the
+  # input at query boundaries and associates the query only with preceding 
comments
+  # (following comments are associated with the next query). This is a problem 
with
+  # trailing comments. For example, consider the following input:
+  # -------------
+  # -- comment1
+  # select 1;
+  # -- comment2
+  # -------------
+  # When sqlparse splits the query, "comment1" is associated with the query 
"select 1" and
+  # "--comment2" is sent as is. Impala's parser however doesn't consider it a 
valid SQL
+  # and throws an exception. We identify such trailing comments and ignore 
them (do not
+  # send them to Impala).
+  if query_list and not sqlparse.format(query_list[-1], 
strip_comments=True).strip("\n"):
+    query_list.pop()
+  return query_list
 
 def parse_variables(keyvals):
   """Parse variable assignments passed as arguments in the command line"""

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5ede8eb8/tests/shell/test_file_comments.sql
----------------------------------------------------------------------
diff --git a/tests/shell/test_file_comments.sql 
b/tests/shell/test_file_comments.sql
index e732dfc..49b3c0f 100644
--- a/tests/shell/test_file_comments.sql
+++ b/tests/shell/test_file_comments.sql
@@ -11,3 +11,9 @@ SELECT 1+
 1;
 SELECT /* This comment block
 is OK */ 2;
+/* Trailing comment -1 */
+-- Trailing comment -2
+/*
+-- query inside a comment
+select 2;
+/* nested comments end */

Reply via email to