[4/8] impala git commit: IMPALA-6999: Upgrade to sqlparse-0.1.19 for Impala shell

tarmstrong Fri, 11 May 2018 19:39:03 -0700

http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py 
b/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py
new file mode 100644
index 0000000..676344f
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py
@@ -0,0 +1,728 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from os.path import abspath, join
+
+from sqlparse import sql, tokens as T
+from sqlparse.engine import FilterStack
+from sqlparse.lexer import tokenize
+from sqlparse.pipeline import Pipeline
+from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation,
+                             String, Whitespace)
+from sqlparse.utils import memoize_generator
+from sqlparse.utils import split_unquoted_newlines
+
+
+# --------------------------
+# token process
+
+class _CaseFilter:
+
+    ttype = None
+
+    def __init__(self, case=None):
+        if case is None:
+            case = 'upper'
+        assert case in ['lower', 'upper', 'capitalize']
+        self.convert = getattr(unicode, case)
+
+    def process(self, stack, stream):
+        for ttype, value in stream:
+            if ttype in self.ttype:
+                value = self.convert(value)
+            yield ttype, value
+
+
+class KeywordCaseFilter(_CaseFilter):
+    ttype = T.Keyword
+
+
+class IdentifierCaseFilter(_CaseFilter):
+    ttype = (T.Name, T.String.Symbol)
+
+    def process(self, stack, stream):
+        for ttype, value in stream:
+            if ttype in self.ttype and not value.strip()[0] == '"':
+                value = self.convert(value)
+            yield ttype, value
+
+
+class TruncateStringFilter:
+
+    def __init__(self, width, char):
+        self.width = max(width, 1)
+        self.char = unicode(char)
+
+    def process(self, stack, stream):
+        for ttype, value in stream:
+            if ttype is T.Literal.String.Single:
+                if value[:2] == '\'\'':
+                    inner = value[2:-2]
+                    quote = u'\'\''
+                else:
+                    inner = value[1:-1]
+                    quote = u'\''
+                if len(inner) > self.width:
+                    value = u''.join((quote, inner[:self.width], self.char,
+                                      quote))
+            yield ttype, value
+
+
+class GetComments:
+    """Get the comments from a stack"""
+    def process(self, stack, stream):
+        for token_type, value in stream:
+            if token_type in Comment:
+                yield token_type, value
+
+
+class StripComments:
+    """Strip the comments from a stack"""
+    def process(self, stack, stream):
+        for token_type, value in stream:
+            if token_type not in Comment:
+                yield token_type, value
+
+
+def StripWhitespace(stream):
+    "Strip the useless whitespaces from a stream leaving only the minimal ones"
+    last_type = None
+    has_space = False
+    ignore_group = frozenset((Comparison, Punctuation))
+
+    for token_type, value in stream:
+        # We got a previous token (not empty first ones)
+        if last_type:
+            if token_type in Whitespace:
+                has_space = True
+                continue
+
+        # Ignore first empty spaces and dot-commas
+        elif token_type in (Whitespace, Whitespace.Newline, ignore_group):
+            continue
+
+        # Yield a whitespace if it can't be ignored
+        if has_space:
+            if not ignore_group.intersection((last_type, token_type)):
+                yield Whitespace, ' '
+            has_space = False
+
+        # Yield the token and set its type for checking with the next one
+        yield token_type, value
+        last_type = token_type
+
+
+class IncludeStatement:
+    """Filter that enable a INCLUDE statement"""
+
+    def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False):
+        if maxrecursive <= 0:
+            raise ValueError('Max recursion limit reached')
+
+        self.dirpath = abspath(dirpath)
+        self.maxRecursive = maxrecursive
+        self.raiseexceptions = raiseexceptions
+
+        self.detected = False
+
+    @memoize_generator
+    def process(self, stack, stream):
+        # Run over all tokens in the stream
+        for token_type, value in stream:
+            # INCLUDE statement found, set detected mode
+            if token_type in Name and value.upper() == 'INCLUDE':
+                self.detected = True
+                continue
+
+            # INCLUDE statement was found, parse it
+            elif self.detected:
+                # Omit whitespaces
+                if token_type in Whitespace:
+                    continue
+
+                # Found file path to include
+                if token_type in String.Symbol:
+#                if token_type in tokens.String.Symbol:
+
+                    # Get path of file to include
+                    path = join(self.dirpath, value[1:-1])
+
+                    try:
+                        f = open(path)
+                        raw_sql = f.read()
+                        f.close()
+
+                    # There was a problem loading the include file
+                    except IOError, err:
+                        # Raise the exception to the interpreter
+                        if self.raiseexceptions:
+                            raise
+
+                        # Put the exception as a comment on the SQL code
+                        yield Comment, u'-- IOError: %s\n' % err
+
+                    else:
+                        # Create new FilterStack to parse readed file
+                        # and add all its tokens to the main stack recursively
+                        try:
+                            filtr = IncludeStatement(self.dirpath,
+                                                     self.maxRecursive - 1,
+                                                     self.raiseexceptions)
+
+                        # Max recursion limit reached
+                        except ValueError, err:
+                            # Raise the exception to the interpreter
+                            if self.raiseexceptions:
+                                raise
+
+                            # Put the exception as a comment on the SQL code
+                            yield Comment, u'-- ValueError: %s\n' % err
+
+                        stack = FilterStack()
+                        stack.preprocess.append(filtr)
+
+                        for tv in stack.run(raw_sql):
+                            yield tv
+
+                    # Set normal mode
+                    self.detected = False
+
+                # Don't include any token while in detected mode
+                continue
+
+            # Normal token
+            yield token_type, value
+
+
+# ----------------------
+# statement process
+
+class StripCommentsFilter:
+
+    def _get_next_comment(self, tlist):
+        # TODO(andi) Comment types should be unified, see related issue38
+        token = tlist.token_next_by_instance(0, sql.Comment)
+        if token is None:
+            token = tlist.token_next_by_type(0, T.Comment)
+        return token
+
+    def _process(self, tlist):
+        token = self._get_next_comment(tlist)
+        while token:
+            tidx = tlist.token_index(token)
+            prev = tlist.token_prev(tidx, False)
+            next_ = tlist.token_next(tidx, False)
+            # Replace by whitespace if prev and next exist and if they're not
+            # whitespaces. This doesn't apply if prev or next is a paranthesis.
+            if (prev is not None and next_ is not None
+                and not prev.is_whitespace() and not next_.is_whitespace()
+                and not (prev.match(T.Punctuation, '(')
+                         or next_.match(T.Punctuation, ')'))):
+                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
+            else:
+                tlist.tokens.pop(tidx)
+            token = self._get_next_comment(tlist)
+
+    def process(self, stack, stmt):
+        [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+        self._process(stmt)
+
+
+class StripWhitespaceFilter:
+
+    def _stripws(self, tlist):
+        func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
+        func = getattr(self, func_name, self._stripws_default)
+        func(tlist)
+
+    def _stripws_default(self, tlist):
+        last_was_ws = False
+        for token in tlist.tokens:
+            if token.is_whitespace():
+                if last_was_ws:
+                    token.value = ''
+                else:
+                    token.value = ' '
+            last_was_ws = token.is_whitespace()
+
+    def _stripws_identifierlist(self, tlist):
+        # Removes newlines before commas, see issue140
+        last_nl = None
+        for token in tlist.tokens[:]:
+            if (token.ttype is T.Punctuation
+                and token.value == ','
+                and last_nl is not None):
+                tlist.tokens.remove(last_nl)
+            if token.is_whitespace():
+                last_nl = token
+            else:
+                last_nl = None
+        return self._stripws_default(tlist)
+
+    def _stripws_parenthesis(self, tlist):
+        if tlist.tokens[1].is_whitespace():
+            tlist.tokens.pop(1)
+        if tlist.tokens[-2].is_whitespace():
+            tlist.tokens.pop(-2)
+        self._stripws_default(tlist)
+
+    def process(self, stack, stmt, depth=0):
+        [self.process(stack, sgroup, depth + 1)
+         for sgroup in stmt.get_sublists()]
+        self._stripws(stmt)
+        if (
+            depth == 0
+            and stmt.tokens
+            and stmt.tokens[-1].is_whitespace()
+        ):
+            stmt.tokens.pop(-1)
+
+
+class ReindentFilter:
+
+    def __init__(self, width=2, char=' ', line_width=None):
+        self.width = width
+        self.char = char
+        self.indent = 0
+        self.offset = 0
+        self.line_width = line_width
+        self._curr_stmt = None
+        self._last_stmt = None
+
+    def _flatten_up_to_token(self, token):
+        """Yields all tokens up to token plus the next one."""
+        # helper for _get_offset
+        iterator = self._curr_stmt.flatten()
+        for t in iterator:
+            yield t
+            if t == token:
+                raise StopIteration
+
+    def _get_offset(self, token):
+        raw = ''.join(map(unicode, self._flatten_up_to_token(token)))
+        line = raw.splitlines()[-1]
+        # Now take current offset into account and return relative offset.
+        full_offset = len(line) - len(self.char * (self.width * self.indent))
+        return full_offset - self.offset
+
+    def nl(self):
+        # TODO: newline character should be configurable
+        space = (self.char * ((self.indent * self.width) + self.offset))
+        # Detect runaway indenting due to parsing errors
+        if len(space) > 200:
+            # something seems to be wrong, flip back
+            self.indent = self.offset = 0
+            space = (self.char * ((self.indent * self.width) + self.offset))
+        ws = '\n' + space
+        return sql.Token(T.Whitespace, ws)
+
+    def _split_kwds(self, tlist):
+        split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
+                       'GROUP', 'ORDER', 'UNION', 'VALUES',
+                       'SET', 'BETWEEN', 'EXCEPT', 'HAVING')
+
+        def _next_token(i):
+            t = tlist.token_next_match(i, T.Keyword, split_words,
+                                       regex=True)
+            if t and t.value.upper() == 'BETWEEN':
+                t = _next_token(tlist.token_index(t) + 1)
+                if t and t.value.upper() == 'AND':
+                    t = _next_token(tlist.token_index(t) + 1)
+            return t
+
+        idx = 0
+        token = _next_token(idx)
+        added = set()
+        while token:
+            prev = tlist.token_prev(tlist.token_index(token), False)
+            offset = 1
+            if prev and prev.is_whitespace() and prev not in added:
+                tlist.tokens.pop(tlist.token_index(prev))
+                offset += 1
+            uprev = unicode(prev)
+            if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))):
+                nl = tlist.token_next(token)
+            else:
+                nl = self.nl()
+                added.add(nl)
+                tlist.insert_before(token, nl)
+                offset += 1
+            token = _next_token(tlist.token_index(nl) + offset)
+
+    def _split_statements(self, tlist):
+        idx = 0
+        token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
+        while token:
+            prev = tlist.token_prev(tlist.token_index(token), False)
+            if prev and prev.is_whitespace():
+                tlist.tokens.pop(tlist.token_index(prev))
+            # only break if it's not the first token
+            if prev:
+                nl = self.nl()
+                tlist.insert_before(token, nl)
+            token = tlist.token_next_by_type(tlist.token_index(token) + 1,
+                                             (T.Keyword.DDL, T.Keyword.DML))
+
+    def _process(self, tlist):
+        func_name = '_process_%s' % tlist.__class__.__name__.lower()
+        func = getattr(self, func_name, self._process_default)
+        func(tlist)
+
+    def _process_where(self, tlist):
+        token = tlist.token_next_match(0, T.Keyword, 'WHERE')
+        try:
+            tlist.insert_before(token, self.nl())
+        except ValueError:  # issue121, errors in statement
+            pass
+        self.indent += 1
+        self._process_default(tlist)
+        self.indent -= 1
+
+    def _process_having(self, tlist):
+        token = tlist.token_next_match(0, T.Keyword, 'HAVING')
+        try:
+            tlist.insert_before(token, self.nl())
+        except ValueError:  # issue121, errors in statement
+            pass
+        self.indent += 1
+        self._process_default(tlist)
+        self.indent -= 1
+
+    def _process_parenthesis(self, tlist):
+        first = tlist.token_next(0)
+        indented = False
+        if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            self.indent += 1
+            tlist.tokens.insert(0, self.nl())
+            indented = True
+        num_offset = self._get_offset(
+            tlist.token_next_match(0, T.Punctuation, '('))
+        self.offset += num_offset
+        self._process_default(tlist, stmts=not indented)
+        if indented:
+            self.indent -= 1
+        self.offset -= num_offset
+
+    def _process_identifierlist(self, tlist):
+        identifiers = list(tlist.get_identifiers())
+        if len(identifiers) > 1 and not tlist.within(sql.Function):
+            first = list(identifiers[0].flatten())[0]
+            if self.char == '\t':
+                # when using tabs we don't count the actual word length
+                # in spaces.
+                num_offset = 1
+            else:
+                num_offset = self._get_offset(first) - len(first.value)
+            self.offset += num_offset
+            for token in identifiers[1:]:
+                tlist.insert_before(token, self.nl())
+            self.offset -= num_offset
+        self._process_default(tlist)
+
+    def _process_case(self, tlist):
+        is_first = True
+        num_offset = None
+        case = tlist.tokens[0]
+        outer_offset = self._get_offset(case) - len(case.value)
+        self.offset += outer_offset
+        for cond, value in tlist.get_cases():
+            if is_first:
+                tcond = list(cond[0].flatten())[0]
+                is_first = False
+                num_offset = self._get_offset(tcond) - len(tcond.value)
+                self.offset += num_offset
+                continue
+            if cond is None:
+                token = value[0]
+            else:
+                token = cond[0]
+            tlist.insert_before(token, self.nl())
+        # Line breaks on group level are done. Now let's add an offset of
+        # 5 (=length of "when", "then", "else") and process subgroups.
+        self.offset += 5
+        self._process_default(tlist)
+        self.offset -= 5
+        if num_offset is not None:
+            self.offset -= num_offset
+        end = tlist.token_next_match(0, T.Keyword, 'END')
+        tlist.insert_before(end, self.nl())
+        self.offset -= outer_offset
+
+    def _process_default(self, tlist, stmts=True, kwds=True):
+        if stmts:
+            self._split_statements(tlist)
+        if kwds:
+            self._split_kwds(tlist)
+        [self._process(sgroup) for sgroup in tlist.get_sublists()]
+
+    def process(self, stack, stmt):
+        if isinstance(stmt, sql.Statement):
+            self._curr_stmt = stmt
+        self._process(stmt)
+        if isinstance(stmt, sql.Statement):
+            if self._last_stmt is not None:
+                if unicode(self._last_stmt).endswith('\n'):
+                    nl = '\n'
+                else:
+                    nl = '\n\n'
+                stmt.tokens.insert(
+                    0, sql.Token(T.Whitespace, nl))
+            if self._last_stmt != stmt:
+                self._last_stmt = stmt
+
+
+# FIXME: Doesn't work ;)
+class RightMarginFilter:
+
+    keep_together = (
+        # sql.TypeCast, sql.Identifier, sql.Alias,
+    )
+
+    def __init__(self, width=79):
+        self.width = width
+        self.line = ''
+
+    def _process(self, stack, group, stream):
+        for token in stream:
+            if token.is_whitespace() and '\n' in token.value:
+                if token.value.endswith('\n'):
+                    self.line = ''
+                else:
+                    self.line = token.value.splitlines()[-1]
+            elif (token.is_group()
+                  and not token.__class__ in self.keep_together):
+                token.tokens = self._process(stack, token, token.tokens)
+            else:
+                val = unicode(token)
+                if len(self.line) + len(val) > self.width:
+                    match = re.search('^ +', self.line)
+                    if match is not None:
+                        indent = match.group()
+                    else:
+                        indent = ''
+                    yield sql.Token(T.Whitespace, '\n%s' % indent)
+                    self.line = indent
+                self.line += val
+            yield token
+
+    def process(self, stack, group):
+        return
+        group.tokens = self._process(stack, group, group.tokens)
+
+
+class ColumnsSelect:
+    """Get the columns names of a SELECT query"""
+    def process(self, stack, stream):
+        mode = 0
+        oldValue = ""
+        parenthesis = 0
+
+        for token_type, value in stream:
+            # Ignore comments
+            if token_type in Comment:
+                continue
+
+            # We have not detected a SELECT statement
+            if mode == 0:
+                if token_type in Keyword and value == 'SELECT':
+                    mode = 1
+
+            # We have detected a SELECT statement
+            elif mode == 1:
+                if value == 'FROM':
+                    if oldValue:
+                        yield oldValue
+
+                    mode = 3    # Columns have been checked
+
+                elif value == 'AS':
+                    oldValue = ""
+                    mode = 2
+
+                elif (token_type == Punctuation
+                      and value == ',' and not parenthesis):
+                    if oldValue:
+                        yield oldValue
+                    oldValue = ""
+
+                elif token_type not in Whitespace:
+                    if value == '(':
+                        parenthesis += 1
+                    elif value == ')':
+                        parenthesis -= 1
+
+                    oldValue += value
+
+            # We are processing an AS keyword
+            elif mode == 2:
+                # We check also for Keywords because a bug in SQLParse
+                if token_type == Name or token_type == Keyword:
+                    yield value
+                    mode = 1
+
+
+# ---------------------------
+# postprocess
+
+class SerializerUnicode:
+
+    def process(self, stack, stmt):
+        raw = unicode(stmt)
+        lines = split_unquoted_newlines(raw)
+        res = '\n'.join(line.rstrip() for line in lines)
+        return res
+
+
+def Tokens2Unicode(stream):
+    result = ""
+
+    for _, value in stream:
+        result += unicode(value)
+
+    return result
+
+
+class OutputFilter:
+    varname_prefix = ''
+
+    def __init__(self, varname='sql'):
+        self.varname = self.varname_prefix + varname
+        self.count = 0
+
+    def _process(self, stream, varname, has_nl):
+        raise NotImplementedError
+
+    def process(self, stack, stmt):
+        self.count += 1
+        if self.count > 1:
+            varname = '%s%d' % (self.varname, self.count)
+        else:
+            varname = self.varname
+
+        has_nl = len(unicode(stmt).strip().splitlines()) > 1
+        stmt.tokens = self._process(stmt.tokens, varname, has_nl)
+        return stmt
+
+
+class OutputPythonFilter(OutputFilter):
+    def _process(self, stream, varname, has_nl):
+        # SQL query asignation to varname
+        if self.count > 1:
+            yield sql.Token(T.Whitespace, '\n')
+        yield sql.Token(T.Name, varname)
+        yield sql.Token(T.Whitespace, ' ')
+        yield sql.Token(T.Operator, '=')
+        yield sql.Token(T.Whitespace, ' ')
+        if has_nl:
+            yield sql.Token(T.Operator, '(')
+        yield sql.Token(T.Text, "'")
+
+        # Print the tokens on the quote
+        for token in stream:
+            # Token is a new line separator
+            if token.is_whitespace() and '\n' in token.value:
+                # Close quote and add a new line
+                yield sql.Token(T.Text, " '")
+                yield sql.Token(T.Whitespace, '\n')
+
+                # Quote header on secondary lines
+                yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4))
+                yield sql.Token(T.Text, "'")
+
+                # Indentation
+                after_lb = token.value.split('\n', 1)[1]
+                if after_lb:
+                    yield sql.Token(T.Whitespace, after_lb)
+                continue
+
+            # Token has escape chars
+            elif "'" in token.value:
+                token.value = token.value.replace("'", "\\'")
+
+            # Put the token
+            yield sql.Token(T.Text, token.value)
+
+        # Close quote
+        yield sql.Token(T.Text, "'")
+        if has_nl:
+            yield sql.Token(T.Operator, ')')
+
+
+class OutputPHPFilter(OutputFilter):
+    varname_prefix = '$'
+
+    def _process(self, stream, varname, has_nl):
+        # SQL query asignation to varname (quote header)
+        if self.count > 1:
+            yield sql.Token(T.Whitespace, '\n')
+        yield sql.Token(T.Name, varname)
+        yield sql.Token(T.Whitespace, ' ')
+        if has_nl:
+            yield sql.Token(T.Whitespace, ' ')
+        yield sql.Token(T.Operator, '=')
+        yield sql.Token(T.Whitespace, ' ')
+        yield sql.Token(T.Text, '"')
+
+        # Print the tokens on the quote
+        for token in stream:
+            # Token is a new line separator
+            if token.is_whitespace() and '\n' in token.value:
+                # Close quote and add a new line
+                yield sql.Token(T.Text, ' ";')
+                yield sql.Token(T.Whitespace, '\n')
+
+                # Quote header on secondary lines
+                yield sql.Token(T.Name, varname)
+                yield sql.Token(T.Whitespace, ' ')
+                yield sql.Token(T.Operator, '.=')
+                yield sql.Token(T.Whitespace, ' ')
+                yield sql.Token(T.Text, '"')
+
+                # Indentation
+                after_lb = token.value.split('\n', 1)[1]
+                if after_lb:
+                    yield sql.Token(T.Whitespace, after_lb)
+                continue
+
+            # Token has escape chars
+            elif '"' in token.value:
+                token.value = token.value.replace('"', '\\"')
+
+            # Put the token
+            yield sql.Token(T.Text, token.value)
+
+        # Close quote
+        yield sql.Token(T.Text, '"')
+        yield sql.Token(T.Punctuation, ';')
+
+
+class Limit:
+    """Get the LIMIT of a query.
+
+    If not defined, return -1 (SQL specification for no LIMIT query)
+    """
+    def process(self, stack, stream):
+        index = 7
+        stream = list(stream)
+        stream.reverse()
+
+        # Run over all tokens in the stream from the end
+        for token_type, value in stream:
+            index -= 1
+
+#            if index and token_type in Keyword:
+            if index and token_type in Keyword and value == 'LIMIT':
+                return stream[4 - index][1]
+
+        return -1
+
+
+def compact(stream):
+    """Function that return a compacted version of the stream"""
+    pipe = Pipeline()
+
+    pipe.append(StripComments())
+    pipe.append(StripWhitespace)
+
+    return pipe(stream)


http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py 
b/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py
new file mode 100644
index 0000000..811f5af
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py
@@ -0,0 +1,137 @@
+# Copyright (C) 2008 Andi Albrecht, [email protected]
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL formatter"""
+
+from sqlparse import filters
+from sqlparse.exceptions import SQLParseError
+
+
+def validate_options(options):
+    """Validates options."""
+    kwcase = options.get('keyword_case', None)
+    if kwcase not in [None, 'upper', 'lower', 'capitalize']:
+        raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
+
+    idcase = options.get('identifier_case', None)
+    if idcase not in [None, 'upper', 'lower', 'capitalize']:
+        raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
+
+    ofrmt = options.get('output_format', None)
+    if ofrmt not in [None, 'sql', 'python', 'php']:
+        raise SQLParseError('Unknown output format: %r' % ofrmt)
+
+    strip_comments = options.get('strip_comments', False)
+    if strip_comments not in [True, False]:
+        raise SQLParseError('Invalid value for strip_comments: %r'
+                            % strip_comments)
+
+    strip_ws = options.get('strip_whitespace', False)
+    if strip_ws not in [True, False]:
+        raise SQLParseError('Invalid value for strip_whitespace: %r'
+                            % strip_ws)
+
+    truncate_strings = options.get('truncate_strings', None)
+    if truncate_strings is not None:
+        try:
+            truncate_strings = int(truncate_strings)
+        except (ValueError, TypeError):
+            raise SQLParseError('Invalid value for truncate_strings: %r'
+                                % truncate_strings)
+        if truncate_strings <= 1:
+            raise SQLParseError('Invalid value for truncate_strings: %r'
+                                % truncate_strings)
+        options['truncate_strings'] = truncate_strings
+        options['truncate_char'] = options.get('truncate_char', '[...]')
+
+    reindent = options.get('reindent', False)
+    if reindent not in [True, False]:
+        raise SQLParseError('Invalid value for reindent: %r'
+                            % reindent)
+    elif reindent:
+        options['strip_whitespace'] = True
+    indent_tabs = options.get('indent_tabs', False)
+    if indent_tabs not in [True, False]:
+        raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs)
+    elif indent_tabs:
+        options['indent_char'] = '\t'
+    else:
+        options['indent_char'] = ' '
+    indent_width = options.get('indent_width', 2)
+    try:
+        indent_width = int(indent_width)
+    except (TypeError, ValueError):
+        raise SQLParseError('indent_width requires an integer')
+    if indent_width < 1:
+        raise SQLParseError('indent_width requires an positive integer')
+    options['indent_width'] = indent_width
+
+    right_margin = options.get('right_margin', None)
+    if right_margin is not None:
+        try:
+            right_margin = int(right_margin)
+        except (TypeError, ValueError):
+            raise SQLParseError('right_margin requires an integer')
+        if right_margin < 10:
+            raise SQLParseError('right_margin requires an integer > 10')
+    options['right_margin'] = right_margin
+
+    return options
+
+
+def build_filter_stack(stack, options):
+    """Setup and return a filter stack.
+
+    Args:
+      stack: :class:`~sqlparse.filters.FilterStack` instance
+      options: Dictionary with options validated by validate_options.
+    """
+    # Token filter
+    if options.get('keyword_case', None):
+        stack.preprocess.append(
+            filters.KeywordCaseFilter(options['keyword_case']))
+
+    if options.get('identifier_case', None):
+        stack.preprocess.append(
+            filters.IdentifierCaseFilter(options['identifier_case']))
+
+    if options.get('truncate_strings', None) is not None:
+        stack.preprocess.append(filters.TruncateStringFilter(
+            width=options['truncate_strings'], char=options['truncate_char']))
+
+    # After grouping
+    if options.get('strip_comments', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(filters.StripCommentsFilter())
+
+    if (options.get('strip_whitespace', False)
+        or options.get('reindent', False)):
+        stack.enable_grouping()
+        stack.stmtprocess.append(filters.StripWhitespaceFilter())
+
+    if options.get('reindent', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(
+            filters.ReindentFilter(char=options['indent_char'],
+                                   width=options['indent_width']))
+
+    if options.get('right_margin', False):
+        stack.enable_grouping()
+        stack.stmtprocess.append(
+            filters.RightMarginFilter(width=options['right_margin']))
+
+    # Serializer
+    if options.get('output_format'):
+        frmt = options['output_format']
+        if frmt.lower() == 'php':
+            fltr = filters.OutputPHPFilter()
+        elif frmt.lower() == 'python':
+            fltr = filters.OutputPythonFilter()
+        else:
+            fltr = None
+        if fltr is not None:
+            stack.postprocess.append(fltr)
+
+    return stack

http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py 
b/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py
new file mode 100644
index 0000000..e54457e
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py
@@ -0,0 +1,44 @@
+'''
+Created on 17/05/2012
+
+@author: piranna
+
+Several utility functions to extract info from the SQL sentences
+'''
+
+from sqlparse.filters import ColumnsSelect, Limit
+from sqlparse.pipeline import Pipeline
+from sqlparse.tokens import Keyword, Whitespace
+
+
+def getlimit(stream):
+    """Function that return the LIMIT of a input SQL """
+    pipe = Pipeline()
+
+    pipe.append(Limit())
+
+    result = pipe(stream)
+    try:
+        return int(result)
+    except ValueError:
+        return result
+
+
+def getcolumns(stream):
+    """Function that return the colums of a SELECT query"""
+    pipe = Pipeline()
+
+    pipe.append(ColumnsSelect())
+
+    return pipe(stream)
+
+
+class IsType(object):
+    """Functor that return is the statement is of a specific type"""
+    def __init__(self, type):
+        self.type = type
+
+    def __call__(self, stream):
+        for token_type, value in stream:
+            if token_type not in Whitespace:
+                return token_type in Keyword and value == self.type

http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py 
b/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py
new file mode 100644
index 0000000..1595aa8
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py
@@ -0,0 +1,574 @@
+from sqlparse import tokens
+
+KEYWORDS = {
+    'ABORT': tokens.Keyword,
+    'ABS': tokens.Keyword,
+    'ABSOLUTE': tokens.Keyword,
+    'ACCESS': tokens.Keyword,
+    'ADA': tokens.Keyword,
+    'ADD': tokens.Keyword,
+    'ADMIN': tokens.Keyword,
+    'AFTER': tokens.Keyword,
+    'AGGREGATE': tokens.Keyword,
+    'ALIAS': tokens.Keyword,
+    'ALL': tokens.Keyword,
+    'ALLOCATE': tokens.Keyword,
+    'ANALYSE': tokens.Keyword,
+    'ANALYZE': tokens.Keyword,
+    'ANY': tokens.Keyword,
+    'ARE': tokens.Keyword,
+    'ASC': tokens.Keyword.Order,
+    'ASENSITIVE': tokens.Keyword,
+    'ASSERTION': tokens.Keyword,
+    'ASSIGNMENT': tokens.Keyword,
+    'ASYMMETRIC': tokens.Keyword,
+    'AT': tokens.Keyword,
+    'ATOMIC': tokens.Keyword,
+    'AUTHORIZATION': tokens.Keyword,
+    'AVG': tokens.Keyword,
+
+    'BACKWARD': tokens.Keyword,
+    'BEFORE': tokens.Keyword,
+    'BEGIN': tokens.Keyword,
+    'BETWEEN': tokens.Keyword,
+    'BITVAR': tokens.Keyword,
+    'BIT_LENGTH': tokens.Keyword,
+    'BOTH': tokens.Keyword,
+    'BREADTH': tokens.Keyword,
+
+    # 'C': tokens.Keyword,  # most likely this is an alias
+    'CACHE': tokens.Keyword,
+    'CALL': tokens.Keyword,
+    'CALLED': tokens.Keyword,
+    'CARDINALITY': tokens.Keyword,
+    'CASCADE': tokens.Keyword,
+    'CASCADED': tokens.Keyword,
+    'CAST': tokens.Keyword,
+    'CATALOG': tokens.Keyword,
+    'CATALOG_NAME': tokens.Keyword,
+    'CHAIN': tokens.Keyword,
+    'CHARACTERISTICS': tokens.Keyword,
+    'CHARACTER_LENGTH': tokens.Keyword,
+    'CHARACTER_SET_CATALOG': tokens.Keyword,
+    'CHARACTER_SET_NAME': tokens.Keyword,
+    'CHARACTER_SET_SCHEMA': tokens.Keyword,
+    'CHAR_LENGTH': tokens.Keyword,
+    'CHECK': tokens.Keyword,
+    'CHECKED': tokens.Keyword,
+    'CHECKPOINT': tokens.Keyword,
+    'CLASS': tokens.Keyword,
+    'CLASS_ORIGIN': tokens.Keyword,
+    'CLOB': tokens.Keyword,
+    'CLOSE': tokens.Keyword,
+    'CLUSTER': tokens.Keyword,
+    'COALESCE': tokens.Keyword,
+    'COBOL': tokens.Keyword,
+    'COLLATE': tokens.Keyword,
+    'COLLATION': tokens.Keyword,
+    'COLLATION_CATALOG': tokens.Keyword,
+    'COLLATION_NAME': tokens.Keyword,
+    'COLLATION_SCHEMA': tokens.Keyword,
+    'COLLECT': tokens.Keyword,
+    'COLUMN': tokens.Keyword,
+    'COLUMN_NAME': tokens.Keyword,
+    'COMMAND_FUNCTION': tokens.Keyword,
+    'COMMAND_FUNCTION_CODE': tokens.Keyword,
+    'COMMENT': tokens.Keyword,
+    'COMMIT': tokens.Keyword.DML,
+    'COMMITTED': tokens.Keyword,
+    'COMPLETION': tokens.Keyword,
+    'CONDITION_NUMBER': tokens.Keyword,
+    'CONNECT': tokens.Keyword,
+    'CONNECTION': tokens.Keyword,
+    'CONNECTION_NAME': tokens.Keyword,
+    'CONSTRAINT': tokens.Keyword,
+    'CONSTRAINTS': tokens.Keyword,
+    'CONSTRAINT_CATALOG': tokens.Keyword,
+    'CONSTRAINT_NAME': tokens.Keyword,
+    'CONSTRAINT_SCHEMA': tokens.Keyword,
+    'CONSTRUCTOR': tokens.Keyword,
+    'CONTAINS': tokens.Keyword,
+    'CONTINUE': tokens.Keyword,
+    'CONVERSION': tokens.Keyword,
+    'CONVERT': tokens.Keyword,
+    'COPY': tokens.Keyword,
+    'CORRESPONTING': tokens.Keyword,
+    'COUNT': tokens.Keyword,
+    'CREATEDB': tokens.Keyword,
+    'CREATEUSER': tokens.Keyword,
+    'CROSS': tokens.Keyword,
+    'CUBE': tokens.Keyword,
+    'CURRENT': tokens.Keyword,
+    'CURRENT_DATE': tokens.Keyword,
+    'CURRENT_PATH': tokens.Keyword,
+    'CURRENT_ROLE': tokens.Keyword,
+    'CURRENT_TIME': tokens.Keyword,
+    'CURRENT_TIMESTAMP': tokens.Keyword,
+    'CURRENT_USER': tokens.Keyword,
+    'CURSOR': tokens.Keyword,
+    'CURSOR_NAME': tokens.Keyword,
+    'CYCLE': tokens.Keyword,
+
+    'DATA': tokens.Keyword,
+    'DATABASE': tokens.Keyword,
+    'DATETIME_INTERVAL_CODE': tokens.Keyword,
+    'DATETIME_INTERVAL_PRECISION': tokens.Keyword,
+    'DAY': tokens.Keyword,
+    'DEALLOCATE': tokens.Keyword,
+    'DECLARE': tokens.Keyword,
+    'DEFAULT': tokens.Keyword,
+    'DEFAULTS': tokens.Keyword,
+    'DEFERRABLE': tokens.Keyword,
+    'DEFERRED': tokens.Keyword,
+    'DEFINED': tokens.Keyword,
+    'DEFINER': tokens.Keyword,
+    'DELIMITER': tokens.Keyword,
+    'DELIMITERS': tokens.Keyword,
+    'DEREF': tokens.Keyword,
+    'DESC': tokens.Keyword.Order,
+    'DESCRIBE': tokens.Keyword,
+    'DESCRIPTOR': tokens.Keyword,
+    'DESTROY': tokens.Keyword,
+    'DESTRUCTOR': tokens.Keyword,
+    'DETERMINISTIC': tokens.Keyword,
+    'DIAGNOSTICS': tokens.Keyword,
+    'DICTIONARY': tokens.Keyword,
+    'DISCONNECT': tokens.Keyword,
+    'DISPATCH': tokens.Keyword,
+    'DO': tokens.Keyword,
+    'DOMAIN': tokens.Keyword,
+    'DYNAMIC': tokens.Keyword,
+    'DYNAMIC_FUNCTION': tokens.Keyword,
+    'DYNAMIC_FUNCTION_CODE': tokens.Keyword,
+
+    'EACH': tokens.Keyword,
+    'ENCODING': tokens.Keyword,
+    'ENCRYPTED': tokens.Keyword,
+    'END-EXEC': tokens.Keyword,
+    'EQUALS': tokens.Keyword,
+    'ESCAPE': tokens.Keyword,
+    'EVERY': tokens.Keyword,
+    'EXCEPT': tokens.Keyword,
+    'ESCEPTION': tokens.Keyword,
+    'EXCLUDING': tokens.Keyword,
+    'EXCLUSIVE': tokens.Keyword,
+    'EXEC': tokens.Keyword,
+    'EXECUTE': tokens.Keyword,
+    'EXISTING': tokens.Keyword,
+    'EXISTS': tokens.Keyword,
+    'EXTERNAL': tokens.Keyword,
+    'EXTRACT': tokens.Keyword,
+
+    'FALSE': tokens.Keyword,
+    'FETCH': tokens.Keyword,
+    'FINAL': tokens.Keyword,
+    'FIRST': tokens.Keyword,
+    'FORCE': tokens.Keyword,
+    'FOREACH': tokens.Keyword,
+    'FOREIGN': tokens.Keyword,
+    'FORTRAN': tokens.Keyword,
+    'FORWARD': tokens.Keyword,
+    'FOUND': tokens.Keyword,
+    'FREE': tokens.Keyword,
+    'FREEZE': tokens.Keyword,
+    'FULL': tokens.Keyword,
+    'FUNCTION': tokens.Keyword,
+
+    # 'G': tokens.Keyword,
+    'GENERAL': tokens.Keyword,
+    'GENERATED': tokens.Keyword,
+    'GET': tokens.Keyword,
+    'GLOBAL': tokens.Keyword,
+    'GO': tokens.Keyword,
+    'GOTO': tokens.Keyword,
+    'GRANT': tokens.Keyword,
+    'GRANTED': tokens.Keyword,
+    'GROUPING': tokens.Keyword,
+
+    'HANDLER': tokens.Keyword,
+    'HAVING': tokens.Keyword,
+    'HIERARCHY': tokens.Keyword,
+    'HOLD': tokens.Keyword,
+    'HOST': tokens.Keyword,
+
+    'IDENTITY': tokens.Keyword,
+    'IGNORE': tokens.Keyword,
+    'ILIKE': tokens.Keyword,
+    'IMMEDIATE': tokens.Keyword,
+    'IMMUTABLE': tokens.Keyword,
+
+    'IMPLEMENTATION': tokens.Keyword,
+    'IMPLICIT': tokens.Keyword,
+    'INCLUDING': tokens.Keyword,
+    'INCREMENT': tokens.Keyword,
+    'INDEX': tokens.Keyword,
+
+    'INDITCATOR': tokens.Keyword,
+    'INFIX': tokens.Keyword,
+    'INHERITS': tokens.Keyword,
+    'INITIALIZE': tokens.Keyword,
+    'INITIALLY': tokens.Keyword,
+    'INOUT': tokens.Keyword,
+    'INPUT': tokens.Keyword,
+    'INSENSITIVE': tokens.Keyword,
+    'INSTANTIABLE': tokens.Keyword,
+    'INSTEAD': tokens.Keyword,
+    'INTERSECT': tokens.Keyword,
+    'INTO': tokens.Keyword,
+    'INVOKER': tokens.Keyword,
+    'IS': tokens.Keyword,
+    'ISNULL': tokens.Keyword,
+    'ISOLATION': tokens.Keyword,
+    'ITERATE': tokens.Keyword,
+
+    # 'K': tokens.Keyword,
+    'KEY': tokens.Keyword,
+    'KEY_MEMBER': tokens.Keyword,
+    'KEY_TYPE': tokens.Keyword,
+
+    'LANCOMPILER': tokens.Keyword,
+    'LANGUAGE': tokens.Keyword,
+    'LARGE': tokens.Keyword,
+    'LAST': tokens.Keyword,
+    'LATERAL': tokens.Keyword,
+    'LEADING': tokens.Keyword,
+    'LENGTH': tokens.Keyword,
+    'LESS': tokens.Keyword,
+    'LEVEL': tokens.Keyword,
+    'LIMIT': tokens.Keyword,
+    'LISTEN': tokens.Keyword,
+    'LOAD': tokens.Keyword,
+    'LOCAL': tokens.Keyword,
+    'LOCALTIME': tokens.Keyword,
+    'LOCALTIMESTAMP': tokens.Keyword,
+    'LOCATION': tokens.Keyword,
+    'LOCATOR': tokens.Keyword,
+    'LOCK': tokens.Keyword,
+    'LOWER': tokens.Keyword,
+
+    # 'M': tokens.Keyword,
+    'MAP': tokens.Keyword,
+    'MATCH': tokens.Keyword,
+    'MAXVALUE': tokens.Keyword,
+    'MESSAGE_LENGTH': tokens.Keyword,
+    'MESSAGE_OCTET_LENGTH': tokens.Keyword,
+    'MESSAGE_TEXT': tokens.Keyword,
+    'METHOD': tokens.Keyword,
+    'MINUTE': tokens.Keyword,
+    'MINVALUE': tokens.Keyword,
+    'MOD': tokens.Keyword,
+    'MODE': tokens.Keyword,
+    'MODIFIES': tokens.Keyword,
+    'MODIFY': tokens.Keyword,
+    'MONTH': tokens.Keyword,
+    'MORE': tokens.Keyword,
+    'MOVE': tokens.Keyword,
+    'MUMPS': tokens.Keyword,
+
+    'NAMES': tokens.Keyword,
+    'NATIONAL': tokens.Keyword,
+    'NATURAL': tokens.Keyword,
+    'NCHAR': tokens.Keyword,
+    'NCLOB': tokens.Keyword,
+    'NEW': tokens.Keyword,
+    'NEXT': tokens.Keyword,
+    'NO': tokens.Keyword,
+    'NOCREATEDB': tokens.Keyword,
+    'NOCREATEUSER': tokens.Keyword,
+    'NONE': tokens.Keyword,
+    'NOT': tokens.Keyword,
+    'NOTHING': tokens.Keyword,
+    'NOTIFY': tokens.Keyword,
+    'NOTNULL': tokens.Keyword,
+    'NULL': tokens.Keyword,
+    'NULLABLE': tokens.Keyword,
+    'NULLIF': tokens.Keyword,
+
+    'OBJECT': tokens.Keyword,
+    'OCTET_LENGTH': tokens.Keyword,
+    'OF': tokens.Keyword,
+    'OFF': tokens.Keyword,
+    'OFFSET': tokens.Keyword,
+    'OIDS': tokens.Keyword,
+    'OLD': tokens.Keyword,
+    'ONLY': tokens.Keyword,
+    'OPEN': tokens.Keyword,
+    'OPERATION': tokens.Keyword,
+    'OPERATOR': tokens.Keyword,
+    'OPTION': tokens.Keyword,
+    'OPTIONS': tokens.Keyword,
+    'ORDINALITY': tokens.Keyword,
+    'OUT': tokens.Keyword,
+    'OUTPUT': tokens.Keyword,
+    'OVERLAPS': tokens.Keyword,
+    'OVERLAY': tokens.Keyword,
+    'OVERRIDING': tokens.Keyword,
+    'OWNER': tokens.Keyword,
+
+    'PAD': tokens.Keyword,
+    'PARAMETER': tokens.Keyword,
+    'PARAMETERS': tokens.Keyword,
+    'PARAMETER_MODE': tokens.Keyword,
+    'PARAMATER_NAME': tokens.Keyword,
+    'PARAMATER_ORDINAL_POSITION': tokens.Keyword,
+    'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
+    'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
+    'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword,
+    'PARTIAL': tokens.Keyword,
+    'PASCAL': tokens.Keyword,
+    'PENDANT': tokens.Keyword,
+    'PLACING': tokens.Keyword,
+    'PLI': tokens.Keyword,
+    'POSITION': tokens.Keyword,
+    'POSTFIX': tokens.Keyword,
+    'PRECISION': tokens.Keyword,
+    'PREFIX': tokens.Keyword,
+    'PREORDER': tokens.Keyword,
+    'PREPARE': tokens.Keyword,
+    'PRESERVE': tokens.Keyword,
+    'PRIMARY': tokens.Keyword,
+    'PRIOR': tokens.Keyword,
+    'PRIVILEGES': tokens.Keyword,
+    'PROCEDURAL': tokens.Keyword,
+    'PROCEDURE': tokens.Keyword,
+    'PUBLIC': tokens.Keyword,
+
+    'RAISE': tokens.Keyword,
+    'READ': tokens.Keyword,
+    'READS': tokens.Keyword,
+    'RECHECK': tokens.Keyword,
+    'RECURSIVE': tokens.Keyword,
+    'REF': tokens.Keyword,
+    'REFERENCES': tokens.Keyword,
+    'REFERENCING': tokens.Keyword,
+    'REINDEX': tokens.Keyword,
+    'RELATIVE': tokens.Keyword,
+    'RENAME': tokens.Keyword,
+    'REPEATABLE': tokens.Keyword,
+    'RESET': tokens.Keyword,
+    'RESTART': tokens.Keyword,
+    'RESTRICT': tokens.Keyword,
+    'RESULT': tokens.Keyword,
+    'RETURN': tokens.Keyword,
+    'RETURNED_LENGTH': tokens.Keyword,
+    'RETURNED_OCTET_LENGTH': tokens.Keyword,
+    'RETURNED_SQLSTATE': tokens.Keyword,
+    'RETURNS': tokens.Keyword,
+    'REVOKE': tokens.Keyword,
+    'RIGHT': tokens.Keyword,
+    'ROLE': tokens.Keyword,
+    'ROLLBACK': tokens.Keyword.DML,
+    'ROLLUP': tokens.Keyword,
+    'ROUTINE': tokens.Keyword,
+    'ROUTINE_CATALOG': tokens.Keyword,
+    'ROUTINE_NAME': tokens.Keyword,
+    'ROUTINE_SCHEMA': tokens.Keyword,
+    'ROW': tokens.Keyword,
+    'ROWS': tokens.Keyword,
+    'ROW_COUNT': tokens.Keyword,
+    'RULE': tokens.Keyword,
+
+    'SAVE_POINT': tokens.Keyword,
+    'SCALE': tokens.Keyword,
+    'SCHEMA': tokens.Keyword,
+    'SCHEMA_NAME': tokens.Keyword,
+    'SCOPE': tokens.Keyword,
+    'SCROLL': tokens.Keyword,
+    'SEARCH': tokens.Keyword,
+    'SECOND': tokens.Keyword,
+    'SECURITY': tokens.Keyword,
+    'SELF': tokens.Keyword,
+    'SENSITIVE': tokens.Keyword,
+    'SERIALIZABLE': tokens.Keyword,
+    'SERVER_NAME': tokens.Keyword,
+    'SESSION': tokens.Keyword,
+    'SESSION_USER': tokens.Keyword,
+    'SETOF': tokens.Keyword,
+    'SETS': tokens.Keyword,
+    'SHARE': tokens.Keyword,
+    'SHOW': tokens.Keyword,
+    'SIMILAR': tokens.Keyword,
+    'SIMPLE': tokens.Keyword,
+    'SIZE': tokens.Keyword,
+    'SOME': tokens.Keyword,
+    'SOURCE': tokens.Keyword,
+    'SPACE': tokens.Keyword,
+    'SPECIFIC': tokens.Keyword,
+    'SPECIFICTYPE': tokens.Keyword,
+    'SPECIFIC_NAME': tokens.Keyword,
+    'SQL': tokens.Keyword,
+    'SQLCODE': tokens.Keyword,
+    'SQLERROR': tokens.Keyword,
+    'SQLEXCEPTION': tokens.Keyword,
+    'SQLSTATE': tokens.Keyword,
+    'SQLWARNING': tokens.Keyword,
+    'STABLE': tokens.Keyword,
+    'START': tokens.Keyword.DML,
+    'STATE': tokens.Keyword,
+    'STATEMENT': tokens.Keyword,
+    'STATIC': tokens.Keyword,
+    'STATISTICS': tokens.Keyword,
+    'STDIN': tokens.Keyword,
+    'STDOUT': tokens.Keyword,
+    'STORAGE': tokens.Keyword,
+    'STRICT': tokens.Keyword,
+    'STRUCTURE': tokens.Keyword,
+    'STYPE': tokens.Keyword,
+    'SUBCLASS_ORIGIN': tokens.Keyword,
+    'SUBLIST': tokens.Keyword,
+    'SUBSTRING': tokens.Keyword,
+    'SUM': tokens.Keyword,
+    'SYMMETRIC': tokens.Keyword,
+    'SYSID': tokens.Keyword,
+    'SYSTEM': tokens.Keyword,
+    'SYSTEM_USER': tokens.Keyword,
+
+    'TABLE': tokens.Keyword,
+    'TABLE_NAME': tokens.Keyword,
+    'TEMP': tokens.Keyword,
+    'TEMPLATE': tokens.Keyword,
+    'TEMPORARY': tokens.Keyword,
+    'TERMINATE': tokens.Keyword,
+    'THAN': tokens.Keyword,
+    'TIMESTAMP': tokens.Keyword,
+    'TIMEZONE_HOUR': tokens.Keyword,
+    'TIMEZONE_MINUTE': tokens.Keyword,
+    'TO': tokens.Keyword,
+    'TOAST': tokens.Keyword,
+    'TRAILING': tokens.Keyword,
+    'TRANSATION': tokens.Keyword,
+    'TRANSACTIONS_COMMITTED': tokens.Keyword,
+    'TRANSACTIONS_ROLLED_BACK': tokens.Keyword,
+    'TRANSATION_ACTIVE': tokens.Keyword,
+    'TRANSFORM': tokens.Keyword,
+    'TRANSFORMS': tokens.Keyword,
+    'TRANSLATE': tokens.Keyword,
+    'TRANSLATION': tokens.Keyword,
+    'TREAT': tokens.Keyword,
+    'TRIGGER': tokens.Keyword,
+    'TRIGGER_CATALOG': tokens.Keyword,
+    'TRIGGER_NAME': tokens.Keyword,
+    'TRIGGER_SCHEMA': tokens.Keyword,
+    'TRIM': tokens.Keyword,
+    'TRUE': tokens.Keyword,
+    'TRUNCATE': tokens.Keyword,
+    'TRUSTED': tokens.Keyword,
+    'TYPE': tokens.Keyword,
+
+    'UNCOMMITTED': tokens.Keyword,
+    'UNDER': tokens.Keyword,
+    'UNENCRYPTED': tokens.Keyword,
+    'UNION': tokens.Keyword,
+    'UNIQUE': tokens.Keyword,
+    'UNKNOWN': tokens.Keyword,
+    'UNLISTEN': tokens.Keyword,
+    'UNNAMED': tokens.Keyword,
+    'UNNEST': tokens.Keyword,
+    'UNTIL': tokens.Keyword,
+    'UPPER': tokens.Keyword,
+    'USAGE': tokens.Keyword,
+    'USE': tokens.Keyword,
+    'USER': tokens.Keyword,
+    'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
+    'USER_DEFINED_TYPE_NAME': tokens.Keyword,
+    'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword,
+    'USING': tokens.Keyword,
+
+    'VACUUM': tokens.Keyword,
+    'VALID': tokens.Keyword,
+    'VALIDATOR': tokens.Keyword,
+    'VALUES': tokens.Keyword,
+    'VARIABLE': tokens.Keyword,
+    'VERBOSE': tokens.Keyword,
+    'VERSION': tokens.Keyword,
+    'VIEW': tokens.Keyword,
+    'VOLATILE': tokens.Keyword,
+
+    'WHENEVER': tokens.Keyword,
+    'WITH': tokens.Keyword,
+    'WITHOUT': tokens.Keyword,
+    'WORK': tokens.Keyword,
+    'WRITE': tokens.Keyword,
+
+    'YEAR': tokens.Keyword,
+
+    'ZONE': tokens.Keyword,
+
+    # Name.Builtin
+    'ARRAY': tokens.Name.Builtin,
+    'BIGINT': tokens.Name.Builtin,
+    'BINARY': tokens.Name.Builtin,
+    'BIT': tokens.Name.Builtin,
+    'BLOB': tokens.Name.Builtin,
+    'BOOLEAN': tokens.Name.Builtin,
+    'CHAR': tokens.Name.Builtin,
+    'CHARACTER': tokens.Name.Builtin,
+    'DATE': tokens.Name.Builtin,
+    'DEC': tokens.Name.Builtin,
+    'DECIMAL': tokens.Name.Builtin,
+    'FLOAT': tokens.Name.Builtin,
+    'INT': tokens.Name.Builtin,
+    'INT8': tokens.Name.Builtin,
+    'INTEGER': tokens.Name.Builtin,
+    'INTERVAL': tokens.Name.Builtin,
+    'LONG': tokens.Name.Builtin,
+    'NUMBER': tokens.Name.Builtin,
+    'NUMERIC': tokens.Name.Builtin,
+    'REAL': tokens.Name.Builtin,
+    'SERIAL': tokens.Name.Builtin,
+    'SERIAL8': tokens.Name.Builtin,
+    'SIGNED': tokens.Name.Builtin,
+    'SMALLINT': tokens.Name.Builtin,
+    'TEXT': tokens.Name.Builtin,
+    'TINYINT': tokens.Name.Builtin,
+    'UNSIGNED': tokens.Name.Builtin,
+    'VARCHAR': tokens.Name.Builtin,
+    'VARCHAR2': tokens.Name.Builtin,
+    'VARYING': tokens.Name.Builtin,
+}
+
+
+KEYWORDS_COMMON = {
+    'SELECT': tokens.Keyword.DML,
+    'INSERT': tokens.Keyword.DML,
+    'DELETE': tokens.Keyword.DML,
+    'UPDATE': tokens.Keyword.DML,
+    'REPLACE': tokens.Keyword.DML,
+    'MERGE': tokens.Keyword.DML,
+    'DROP': tokens.Keyword.DDL,
+    'CREATE': tokens.Keyword.DDL,
+    'ALTER': tokens.Keyword.DDL,
+
+    'WHERE': tokens.Keyword,
+    'FROM': tokens.Keyword,
+    'INNER': tokens.Keyword,
+    'JOIN': tokens.Keyword,
+    'STRAIGHT_JOIN': tokens.Keyword,
+    'AND': tokens.Keyword,
+    'OR': tokens.Keyword,
+    'LIKE': tokens.Keyword,
+    'ON': tokens.Keyword,
+    'IN': tokens.Keyword,
+    'SET': tokens.Keyword,
+
+    'BY': tokens.Keyword,
+    'GROUP': tokens.Keyword,
+    'ORDER': tokens.Keyword,
+    'LEFT': tokens.Keyword,
+    'OUTER': tokens.Keyword,
+    'FULL': tokens.Keyword,
+
+    'IF': tokens.Keyword,
+    'END': tokens.Keyword,
+    'THEN': tokens.Keyword,
+    'LOOP': tokens.Keyword,
+    'AS': tokens.Keyword,
+    'ELSE': tokens.Keyword,
+    'FOR': tokens.Keyword,
+
+    'CASE': tokens.Keyword,
+    'WHEN': tokens.Keyword,
+    'MIN': tokens.Keyword,
+    'MAX': tokens.Keyword,
+    'DISTINCT': tokens.Keyword,
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py 
b/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py
new file mode 100644
index 0000000..fd29f5c
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2008 Andi Albrecht, [email protected]
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL Lexer"""
+
+# This code is based on the SqlLexer in pygments.
+# http://pygments.org/
+# It's separated from the rest of pygments to increase performance
+# and to allow some customizations.
+
+import re
+import sys
+
+from sqlparse import tokens
+from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
+from cStringIO import StringIO
+
+
+class include(str):
+    pass
+
+
+class combined(tuple):
+    """Indicates a state combined from multiple states."""
+
+    def __new__(cls, *args):
+        return tuple.__new__(cls, args)
+
+    def __init__(self, *args):
+        # tuple.__init__ doesn't do anything
+        pass
+
+
+def is_keyword(value):
+    test = value.upper()
+    return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
+
+
+def apply_filters(stream, filters, lexer=None):
+    """
+    Use this method to apply an iterable of filters to
+    a stream. If lexer is given it's forwarded to the
+    filter, otherwise the filter receives `None`.
+    """
+
+    def _apply(filter_, stream):
+        for token in filter_.filter(lexer, stream):
+            yield token
+
+    for filter_ in filters:
+        stream = _apply(filter_, stream)
+    return stream
+
+
+class LexerMeta(type):
+    """
+    Metaclass for Lexer, creates the self._tokens attribute from
+    self.tokens on the first instantiation.
+    """
+
+    def _process_state(cls, unprocessed, processed, state):
+        assert type(state) is str, "wrong state name %r" % state
+        assert state[0] != '#', "invalid state name %r" % state
+        if state in processed:
+            return processed[state]
+        tokenlist = processed[state] = []
+        rflags = cls.flags
+        for tdef in unprocessed[state]:
+            if isinstance(tdef, include):
+                # it's a state reference
+                assert tdef != state, "circular state reference %r" % state
+                tokenlist.extend(cls._process_state(
+                    unprocessed, processed, str(tdef)))
+                continue
+
+            assert type(tdef) is tuple, "wrong rule def %r" % tdef
+
+            try:
+                rex = re.compile(tdef[0], rflags).match
+            except Exception, err:
+                raise ValueError(("uncompilable regex %r in state"
+                                  " %r of %r: %s"
+                                  % (tdef[0], state, cls, err)))
+
+            assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \
+                   ('token type must be simple type or callable, not %r'
+                    % (tdef[1],))
+
+            if len(tdef) == 2:
+                new_state = None
+            else:
+                tdef2 = tdef[2]
+                if isinstance(tdef2, str):
+                    # an existing state
+                    if tdef2 == '#pop':
+                        new_state = -1
+                    elif tdef2 in unprocessed:
+                        new_state = (tdef2,)
+                    elif tdef2 == '#push':
+                        new_state = tdef2
+                    elif tdef2[:5] == '#pop:':
+                        new_state = -int(tdef2[5:])
+                    else:
+                        assert False, 'unknown new state %r' % tdef2
+                elif isinstance(tdef2, combined):
+                    # combine a new state from existing ones
+                    new_state = '_tmp_%d' % cls._tmpname
+                    cls._tmpname += 1
+                    itokens = []
+                    for istate in tdef2:
+                        assert istate != state, \
+                               'circular state ref %r' % istate
+                        itokens.extend(cls._process_state(unprocessed,
+                                                          processed, istate))
+                    processed[new_state] = itokens
+                    new_state = (new_state,)
+                elif isinstance(tdef2, tuple):
+                    # push more than one state
+                    for state in tdef2:
+                        assert (state in unprocessed or
+                                state in ('#pop', '#push')), \
+                               'unknown new state ' + state
+                    new_state = tdef2
+                else:
+                    assert False, 'unknown new state def %r' % tdef2
+            tokenlist.append((rex, tdef[1], new_state))
+        return tokenlist
+
+    def process_tokendef(cls):
+        cls._all_tokens = {}
+        cls._tmpname = 0
+        processed = cls._all_tokens[cls.__name__] = {}
+        #tokendefs = tokendefs or cls.tokens[name]
+        for state in cls.tokens.keys():
+            cls._process_state(cls.tokens, processed, state)
+        return processed
+
+    def __call__(cls, *args, **kwds):
+        if not hasattr(cls, '_tokens'):
+            cls._all_tokens = {}
+            cls._tmpname = 0
+            if hasattr(cls, 'token_variants') and cls.token_variants:
+                # don't process yet
+                pass
+            else:
+                cls._tokens = cls.process_tokendef()
+
+        return type.__call__(cls, *args, **kwds)
+
+
+class Lexer(object):
+
+    __metaclass__ = LexerMeta
+
+    encoding = 'utf-8'
+    stripall = False
+    stripnl = False
+    tabsize = 0
+    flags = re.IGNORECASE | re.UNICODE
+
+    tokens = {
+        'root': [
+            (r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single),
+            # $ matches *before* newline, therefore we have two patterns
+            # to match Comment.Single
+            (r'(--|# ).*?$', tokens.Comment.Single),
+            (r'(\r\n|\r|\n)', tokens.Newline),
+            (r'\s+', tokens.Whitespace),
+            (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
+            (r':=', tokens.Assignment),
+            (r'::', tokens.Punctuation),
+            (r'[*]', tokens.Wildcard),
+            (r'CASE\b', tokens.Keyword),  # extended CASE(foo)
+            (r"`(``|[^`])*`", tokens.Name),
+            (r"Â´(Â´Â´|[^Â´])*Â´", tokens.Name),
+            (r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin),
+            (r'\?{1}', tokens.Name.Placeholder),
+            (r'%\(\w+\)s', tokens.Name.Placeholder),
+            (r'%s', tokens.Name.Placeholder),
+            (r'[$:?]\w+', tokens.Name.Placeholder),
+            # FIXME(andi): VALUES shouldn't be listed here
+            # see https://github.com/andialbrecht/sqlparse/pull/64
+            (r'VALUES', tokens.Keyword),
+            (r'(@|##|#)[^\W\d_]\w+', tokens.Name),
+            # IN is special, it may be followed by a parenthesis, but
+            # is never a functino, see issue183
+            (r'in\b(?=[ (])?', tokens.Keyword),
+            (r'[^\W\d_]\w*(?=[.(])', tokens.Name),  # see issue39
+            (r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal),
+            (r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float),
+            (r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float),
+            (r'[-]?[0-9]+', tokens.Number.Integer),
+            (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
+            # not a real string literal in ANSI SQL:
+            (r'(""|".*?[^\\]")', tokens.String.Symbol),
+            # sqlite names can be escaped with [square brackets]. left bracket
+            # cannot be preceded by word character or a right bracket --
+            # otherwise it's probably an array index
+            (r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
+            
(r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b',
 tokens.Keyword),
+            (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword),
+            (r'NOT NULL\b', tokens.Keyword),
+            (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+            (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
+            (r'(?<=\.)[^\W\d_]\w*', tokens.Name),
+            (r'[^\W\d]\w*', is_keyword),
+            (r'[;:()\[\],\.]', tokens.Punctuation),
+            (r'[<>=~!]+', tokens.Operator.Comparison),
+            (r'[+/@#%^&|`?^-]+', tokens.Operator),
+        ],
+        'multiline-comments': [
+            (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
+            (r'\*/', tokens.Comment.Multiline, '#pop'),
+            (r'[^/\*]+', tokens.Comment.Multiline),
+            (r'[/*]', tokens.Comment.Multiline),
+        ]}
+
+    def __init__(self):
+        self.filters = []
+
+    def add_filter(self, filter_, **options):
+        from sqlparse.filters import Filter
+        if not isinstance(filter_, Filter):
+            filter_ = filter_(**options)
+        self.filters.append(filter_)
+
+    def _decode(self, text):
+        if sys.version_info[0] == 3:
+            if isinstance(text, str):
+                return text
+        if self.encoding == 'guess':
+            try:
+                text = text.decode('utf-8')
+                if text.startswith(u'\ufeff'):
+                    text = text[len(u'\ufeff'):]
+            except UnicodeDecodeError:
+                text = text.decode('latin1')
+        else:
+            try:
+                text = text.decode(self.encoding)
+            except UnicodeDecodeError:
+                text = text.decode('unicode-escape')
+
+        if self.tabsize > 0:
+            text = text.expandtabs(self.tabsize)
+        return text
+
+    def get_tokens(self, text, unfiltered=False):
+        """
+        Return an iterable of (tokentype, value) pairs generated from
+        `text`. If `unfiltered` is set to `True`, the filtering mechanism
+        is bypassed even if filters are defined.
+
+        Also preprocess the text, i.e. expand tabs and strip it if
+        wanted and applies registered filters.
+        """
+        if isinstance(text, basestring):
+            if self.stripall:
+                text = text.strip()
+            elif self.stripnl:
+                text = text.strip('\n')
+
+            if sys.version_info[0] < 3 and isinstance(text, unicode):
+                text = StringIO(text.encode('utf-8'))
+                self.encoding = 'utf-8'
+            else:
+                text = StringIO(text)
+
+        def streamer():
+            for i, t, v in self.get_tokens_unprocessed(text):
+                yield t, v
+        stream = streamer()
+        if not unfiltered:
+            stream = apply_filters(stream, self.filters, self)
+        return stream
+
+    def get_tokens_unprocessed(self, stream, stack=('root',)):
+        """
+        Split ``text`` into (tokentype, text) pairs.
+
+        ``stack`` is the inital stack (default: ``['root']``)
+        """
+        pos = 0
+        tokendefs = self._tokens  # see __call__, pylint:disable=E1101
+        statestack = list(stack)
+        statetokens = tokendefs[statestack[-1]]
+        known_names = {}
+
+        text = stream.read()
+        text = self._decode(text)
+
+        while 1:
+            for rexmatch, action, new_state in statetokens:
+                m = rexmatch(text, pos)
+                if m:
+                    value = m.group()
+                    if value in known_names:
+                        yield pos, known_names[value], value
+                    elif type(action) is tokens._TokenType:
+                        yield pos, action, value
+                    elif hasattr(action, '__call__'):
+                        ttype, value = action(value)
+                        known_names[value] = ttype
+                        yield pos, ttype, value
+                    else:
+                        for item in action(self, m):
+                            yield item
+                    pos = m.end()
+                    if new_state is not None:
+                        # state transition
+                        if isinstance(new_state, tuple):
+                            for state in new_state:
+                                if state == '#pop':
+                                    statestack.pop()
+                                elif state == '#push':
+                                    statestack.append(statestack[-1])
+                                elif (
+                                    # Ugly hack - multiline-comments
+                                    # are not stackable
+                                    state != 'multiline-comments'
+                                    or not statestack
+                                    or statestack[-1] != 'multiline-comments'
+                                ):
+                                    statestack.append(state)
+                        elif isinstance(new_state, int):
+                            # pop
+                            del statestack[new_state:]
+                        elif new_state == '#push':
+                            statestack.append(statestack[-1])
+                        else:
+                            assert False, "wrong state def: %r" % new_state
+                        statetokens = tokendefs[statestack[-1]]
+                    break
+            else:
+                try:
+                    if text[pos] == '\n':
+                        # at EOL, reset state to "root"
+                        pos += 1
+                        statestack = ['root']
+                        statetokens = tokendefs['root']
+                        yield pos, tokens.Text, u'\n'
+                        continue
+                    yield pos, tokens.Error, text[pos]
+                    pos += 1
+                except IndexError:
+                    break
+
+
+def tokenize(sql, encoding=None):
+    """Tokenize sql.
+
+    Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
+    of ``(token type, value)`` items.
+    """
+    lexer = Lexer()
+    if encoding is not None:
+        lexer.encoding = encoding
+    return lexer.get_tokens(sql)

http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py 
b/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py
new file mode 100644
index 0000000..34dad19
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2011 Jesus Leganes "piranna", [email protected]
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+from types import GeneratorType
+
+
+class Pipeline(list):
+    """Pipeline to process filters sequentially"""
+
+    def __call__(self, stream):
+        """Run the pipeline
+
+        Return a static (non generator) version of the result
+        """
+
+        # Run the stream over all the filters on the pipeline
+        for filter in self:
+            # Functions and callable objects (objects with '__call__' method)
+            if callable(filter):
+                stream = filter(stream)
+
+            # Normal filters (objects with 'process' method)
+            else:
+                stream = filter.process(None, stream)
+
+        # If last filter return a generator, staticalize it inside a list
+        if isinstance(stream, GeneratorType):
+            return list(stream)
+        return stream

http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py 
b/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py
new file mode 100644
index 0000000..7325712
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py
@@ -0,0 +1,684 @@
+# -*- coding: utf-8 -*-
+
+"""This module contains classes representing syntactical elements of SQL."""
+
+import re
+import sys
+
+from sqlparse import tokens as T
+
+
+class Token(object):
+    """Base class for all other classes in this module.
+
+    It represents a single token and has two instance attributes:
+    ``value`` is the unchange value of the token and ``ttype`` is
+    the type of the token.
+    """
+
+    __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword')
+
+    def __init__(self, ttype, value):
+        self.value = value
+        if ttype in T.Keyword:
+            self.normalized = value.upper()
+        else:
+            self.normalized = value
+        self.ttype = ttype
+        self.is_keyword = ttype in T.Keyword
+        self.parent = None
+
+    def __str__(self):
+        if sys.version_info[0] == 3:
+            return self.value
+        else:
+            return unicode(self).encode('utf-8')
+
+    def __repr__(self):
+        short = self._get_repr_value()
+        if sys.version_info[0] < 3:
+            short = short.encode('utf-8')
+        return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+                                          short, id(self))
+
+    def __unicode__(self):
+        """Returns a unicode representation of this object."""
+        return self.value or ''
+
+    def to_unicode(self):
+        """Returns a unicode representation of this object.
+
+        .. deprecated:: 0.1.5
+           Use ``unicode(token)`` (for Python 3: ``str(token)``) instead.
+        """
+        return unicode(self)
+
+    def _get_repr_name(self):
+        return str(self.ttype).split('.')[-1]
+
+    def _get_repr_value(self):
+        raw = unicode(self)
+        if len(raw) > 7:
+            raw = raw[:6] + u'...'
+        return re.sub('\s+', ' ', raw)
+
+    def flatten(self):
+        """Resolve subgroups."""
+        yield self
+
+    def match(self, ttype, values, regex=False):
+        """Checks whether the token matches the given arguments.
+
+        *ttype* is a token type. If this token doesn't match the given token
+        type.
+        *values* is a list of possible values for this token. The values
+        are OR'ed together so if only one of the values matches ``True``
+        is returned. Except for keyword tokens the comparison is
+        case-sensitive. For convenience it's ok to pass in a single string.
+        If *regex* is ``True`` (default is ``False``) the given values are
+        treated as regular expressions.
+        """
+        type_matched = self.ttype is ttype
+        if not type_matched or values is None:
+            return type_matched
+
+        if regex:
+            if isinstance(values, basestring):
+                values = set([values])
+
+            if self.ttype is T.Keyword:
+                values = set(re.compile(v, re.IGNORECASE) for v in values)
+            else:
+                values = set(re.compile(v) for v in values)
+
+            for pattern in values:
+                if pattern.search(self.value):
+                    return True
+            return False
+
+        if isinstance(values, basestring):
+            if self.is_keyword:
+                return values.upper() == self.normalized
+            return values == self.value
+
+        if self.is_keyword:
+            for v in values:
+                if v.upper() == self.normalized:
+                    return True
+            return False
+
+        return self.value in values
+
+    def is_group(self):
+        """Returns ``True`` if this object has children."""
+        return False
+
+    def is_whitespace(self):
+        """Return ``True`` if this token is a whitespace token."""
+        return self.ttype and self.ttype in T.Whitespace
+
+    def within(self, group_cls):
+        """Returns ``True`` if this token is within *group_cls*.
+
+        Use this method for example to check if an identifier is within
+        a function: ``t.within(sql.Function)``.
+        """
+        parent = self.parent
+        while parent:
+            if isinstance(parent, group_cls):
+                return True
+            parent = parent.parent
+        return False
+
+    def is_child_of(self, other):
+        """Returns ``True`` if this token is a direct child of *other*."""
+        return self.parent == other
+
+    def has_ancestor(self, other):
+        """Returns ``True`` if *other* is in this tokens ancestry."""
+        parent = self.parent
+        while parent:
+            if parent == other:
+                return True
+            parent = parent.parent
+        return False
+
+
+class TokenList(Token):
+    """A group of tokens.
+
+    It has an additional instance attribute ``tokens`` which holds a
+    list of child-tokens.
+    """
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def __init__(self, tokens=None):
+        if tokens is None:
+            tokens = []
+        self.tokens = tokens
+        Token.__init__(self, None, self._to_string())
+
+    def __unicode__(self):
+        return self._to_string()
+
+    def __str__(self):
+        str_ = self._to_string()
+        if sys.version_info[0] < 2:
+            str_ = str_.encode('utf-8')
+        return str_
+
+    def _to_string(self):
+        if sys.version_info[0] == 3:
+            return ''.join(x.value for x in self.flatten())
+        else:
+            return ''.join(unicode(x) for x in self.flatten())
+
+    def _get_repr_name(self):
+        return self.__class__.__name__
+
+    def _pprint_tree(self, max_depth=None, depth=0):
+        """Pretty-print the object tree."""
+        indent = ' ' * (depth * 2)
+        for idx, token in enumerate(self.tokens):
+            if token.is_group():
+                pre = ' +-'
+            else:
+                pre = ' | '
+            print '%s%s%d %s \'%s\'' % (indent, pre, idx,
+                                        token._get_repr_name(),
+                                        token._get_repr_value())
+            if (token.is_group() and (max_depth is None or depth < max_depth)):
+                token._pprint_tree(max_depth, depth + 1)
+
+    def _remove_quotes(self, val):
+        """Helper that removes surrounding quotes from strings."""
+        if not val:
+            return val
+        if val[0] in ('"', '\'') and val[-1] == val[0]:
+            val = val[1:-1]
+        return val
+
+    def get_token_at_offset(self, offset):
+        """Returns the token that is on position offset."""
+        idx = 0
+        for token in self.flatten():
+            end = idx + len(token.value)
+            if idx <= offset <= end:
+                return token
+            idx = end
+
+    def flatten(self):
+        """Generator yielding ungrouped tokens.
+
+        This method is recursively called for all child tokens.
+        """
+        for token in self.tokens:
+            if isinstance(token, TokenList):
+                for item in token.flatten():
+                    yield item
+            else:
+                yield token
+
+#    def __iter__(self):
+#        return self
+#
+#    def next(self):
+#        for token in self.tokens:
+#            yield token
+
+    def is_group(self):
+        return True
+
+    def get_sublists(self):
+#        return [x for x in self.tokens if isinstance(x, TokenList)]
+        for x in self.tokens:
+            if isinstance(x, TokenList):
+                yield x
+
+    @property
+    def _groupable_tokens(self):
+        return self.tokens
+
+    def token_first(self, ignore_whitespace=True, ignore_comments=False):
+        """Returns the first child token.
+
+        If *ignore_whitespace* is ``True`` (the default), whitespace
+        tokens are ignored.
+
+        if *ignore_comments* is ``True`` (default: ``False``), comments are
+        ignored too.
+        """
+        for token in self.tokens:
+            if ignore_whitespace and token.is_whitespace():
+                continue
+            if ignore_comments and isinstance(token, Comment):
+                continue
+            return token
+
+    def token_next_by_instance(self, idx, clss, end=None):
+        """Returns the next token matching a class.
+
+        *idx* is where to start searching in the list of child tokens.
+        *clss* is a list of classes the token should be an instance of.
+
+        If no matching token can be found ``None`` is returned.
+        """
+        if not isinstance(clss, (list, tuple)):
+            clss = (clss,)
+
+        for token in self.tokens[idx:end]:
+            if isinstance(token, clss):
+                return token
+
+    def token_next_by_type(self, idx, ttypes):
+        """Returns next matching token by it's token type."""
+        if not isinstance(ttypes, (list, tuple)):
+            ttypes = [ttypes]
+
+        for token in self.tokens[idx:]:
+            if token.ttype in ttypes:
+                return token
+
+    def token_next_match(self, idx, ttype, value, regex=False):
+        """Returns next token where it's ``match`` method returns ``True``."""
+        if not isinstance(idx, int):
+            idx = self.token_index(idx)
+
+        for n in xrange(idx, len(self.tokens)):
+            token = self.tokens[n]
+            if token.match(ttype, value, regex):
+                return token
+
+    def token_not_matching(self, idx, funcs):
+        for token in self.tokens[idx:]:
+            passed = False
+            for func in funcs:
+                if func(token):
+                    passed = True
+                    break
+
+            if not passed:
+                return token
+
+    def token_matching(self, idx, funcs):
+        for token in self.tokens[idx:]:
+            for func in funcs:
+                if func(token):
+                    return token
+
+    def token_prev(self, idx, skip_ws=True):
+        """Returns the previous token relative to *idx*.
+
+        If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+        ``None`` is returned if there's no previous token.
+        """
+        if idx is None:
+            return None
+
+        if not isinstance(idx, int):
+            idx = self.token_index(idx)
+
+        while idx:
+            idx -= 1
+            if self.tokens[idx].is_whitespace() and skip_ws:
+                continue
+            return self.tokens[idx]
+
+    def token_next(self, idx, skip_ws=True):
+        """Returns the next token relative to *idx*.
+
+        If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+        ``None`` is returned if there's no next token.
+        """
+        if idx is None:
+            return None
+
+        if not isinstance(idx, int):
+            idx = self.token_index(idx)
+
+        while idx < len(self.tokens) - 1:
+            idx += 1
+            if self.tokens[idx].is_whitespace() and skip_ws:
+                continue
+            return self.tokens[idx]
+
+    def token_index(self, token, start=0):
+        """Return list index of token."""
+        if start > 0:
+            # Performing `index` manually is much faster when starting in the 
middle
+            # of the list of tokens and expecting to find the token near to 
the starting
+            # index.
+            for i in xrange(start, len(self.tokens)):
+                if self.tokens[i] == token:
+                    return i
+            return -1
+        return self.tokens.index(token)
+
+    def tokens_between(self, start, end, exclude_end=False):
+        """Return all tokens between (and including) start and end.
+
+        If *exclude_end* is ``True`` (default is ``False``) the end token
+        is included too.
+        """
+        # FIXME(andi): rename exclude_end to inlcude_end
+        if exclude_end:
+            offset = 0
+        else:
+            offset = 1
+        end_idx = self.token_index(end) + offset
+        start_idx = self.token_index(start)
+        return self.tokens[start_idx:end_idx]
+
+    def group_tokens(self, grp_cls, tokens, ignore_ws=False):
+        """Replace tokens by an instance of *grp_cls*."""
+        idx = self.token_index(tokens[0])
+        if ignore_ws:
+            while tokens and tokens[-1].is_whitespace():
+                tokens = tokens[:-1]
+        for t in tokens:
+            self.tokens.remove(t)
+        grp = grp_cls(tokens)
+        for token in tokens:
+            token.parent = grp
+        grp.parent = self
+        self.tokens.insert(idx, grp)
+        return grp
+
+    def insert_before(self, where, token):
+        """Inserts *token* before *where*."""
+        self.tokens.insert(self.token_index(where), token)
+
+    def insert_after(self, where, token, skip_ws=True):
+        """Inserts *token* after *where*."""
+        next_token = self.token_next(where, skip_ws=skip_ws)
+        if next_token is None:
+            self.tokens.append(token)
+        else:
+            self.tokens.insert(self.token_index(next_token), token)
+
+    def has_alias(self):
+        """Returns ``True`` if an alias is present."""
+        return self.get_alias() is not None
+
+    def get_alias(self):
+        """Returns the alias for this identifier or ``None``."""
+
+        # "name AS alias"
+        kw = self.token_next_match(0, T.Keyword, 'AS')
+        if kw is not None:
+            return self._get_first_name(kw, keywords=True)
+
+        # "name alias" or "complicated column expression alias"
+        if len(self.tokens) > 2 \
+           and self.token_next_by_type(0, T.Whitespace) is not None:
+            return self._get_first_name(reverse=True)
+
+        return None
+
+    def get_name(self):
+        """Returns the name of this identifier.
+
+        This is either it's alias or it's real name. The returned valued can
+        be considered as the name under which the object corresponding to
+        this identifier is known within the current statement.
+        """
+        alias = self.get_alias()
+        if alias is not None:
+            return alias
+        return self.get_real_name()
+
+    def get_real_name(self):
+        """Returns the real name (object name) of this identifier."""
+        # a.b
+        dot = self.token_next_match(0, T.Punctuation, '.')
+        if dot is not None:
+            return self._get_first_name(self.token_index(dot))
+
+        return self._get_first_name()
+
+    def get_parent_name(self):
+        """Return name of the parent object if any.
+
+        A parent object is identified by the first occuring dot.
+        """
+        dot = self.token_next_match(0, T.Punctuation, '.')
+        if dot is None:
+            return None
+        prev_ = self.token_prev(self.token_index(dot))
+        if prev_ is None:  # something must be verry wrong here..
+            return None
+        return self._remove_quotes(prev_.value)
+
+    def _get_first_name(self, idx=None, reverse=False, keywords=False):
+        """Returns the name of the first token with a name"""
+
+        if idx and not isinstance(idx, int):
+            idx = self.token_index(idx) + 1
+
+        tokens = self.tokens[idx:] if idx else self.tokens
+        tokens = reversed(tokens) if reverse else tokens
+        types = [T.Name, T.Wildcard, T.String.Symbol]
+
+        if keywords:
+            types.append(T.Keyword)
+
+        for tok in tokens:
+            if tok.ttype in types:
+                return self._remove_quotes(tok.value)
+            elif isinstance(tok, Identifier) or isinstance(tok, Function):
+                return tok.get_name()
+        return None
+
+class Statement(TokenList):
+    """Represents a SQL statement."""
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_type(self):
+        """Returns the type of a statement.
+
+        The returned value is a string holding an upper-cased reprint of
+        the first DML or DDL keyword. If the first token in this group
+        isn't a DML or DDL keyword "UNKNOWN" is returned.
+
+        Whitespaces and comments at the beginning of the statement
+        are ignored.
+        """
+        first_token = self.token_first(ignore_comments=True)
+        if first_token is None:
+            # An "empty" statement that either has not tokens at all
+            # or only whitespace tokens.
+            return 'UNKNOWN'
+
+        elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            return first_token.normalized
+
+        return 'UNKNOWN'
+
+
+class Identifier(TokenList):
+    """Represents an identifier.
+
+    Identifiers may have aliases or typecasts.
+    """
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def is_wildcard(self):
+        """Return ``True`` if this identifier contains a wildcard."""
+        token = self.token_next_by_type(0, T.Wildcard)
+        return token is not None
+
+    def get_typecast(self):
+        """Returns the typecast or ``None`` of this object as a string."""
+        marker = self.token_next_match(0, T.Punctuation, '::')
+        if marker is None:
+            return None
+        next_ = self.token_next(self.token_index(marker), False)
+        if next_ is None:
+            return None
+        return unicode(next_)
+
+    def get_ordering(self):
+        """Returns the ordering or ``None`` as uppercase string."""
+        ordering = self.token_next_by_type(0, T.Keyword.Order)
+        if ordering is None:
+            return None
+        return ordering.value.upper()
+
+    def get_array_indices(self):
+        """Returns an iterator of index token lists"""
+
+        for tok in self.tokens:
+            if isinstance(tok, SquareBrackets):
+                # Use [1:-1] index to discard the square brackets
+                yield tok.tokens[1:-1]
+
+
+class IdentifierList(TokenList):
+    """A list of :class:`~sqlparse.sql.Identifier`\'s."""
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_identifiers(self):
+        """Returns the identifiers.
+
+        Whitespaces and punctuations are not included in this generator.
+        """
+        for x in self.tokens:
+            if not x.is_whitespace() and not x.match(T.Punctuation, ','):
+                yield x
+
+
+class Parenthesis(TokenList):
+    """Tokens between parenthesis."""
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    @property
+    def _groupable_tokens(self):
+        return self.tokens[1:-1]
+
+
+class SquareBrackets(TokenList):
+    """Tokens between square brackets"""
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    @property
+    def _groupable_tokens(self):
+        return self.tokens[1:-1]
+
+class Assignment(TokenList):
+    """An assignment like 'var := val;'"""
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class If(TokenList):
+    """An 'if' clause with possible 'else if' or 'else' parts."""
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class For(TokenList):
+    """A 'FOR' loop."""
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Comparison(TokenList):
+    """A comparison used for example in WHERE clauses."""
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    @property
+    def left(self):
+        return self.tokens[0]
+
+    @property
+    def right(self):
+        return self.tokens[-1]
+
+
+class Comment(TokenList):
+    """A comment."""
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def is_multiline(self):
+        return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
+
+
+class Where(TokenList):
+    """A WHERE clause."""
+    __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Case(TokenList):
+    """A CASE statement with one or more WHEN and possibly an ELSE part."""
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_cases(self):
+        """Returns a list of 2-tuples (condition, value).
+
+        If an ELSE exists condition is None.
+        """
+        CONDITION = 1
+        VALUE = 2
+
+        ret = []
+        mode = CONDITION
+
+        for token in self.tokens:
+            # Set mode from the current statement
+            if token.match(T.Keyword, 'CASE'):
+                continue
+
+            elif token.match(T.Keyword, 'WHEN'):
+                ret.append(([], []))
+                mode = CONDITION
+
+            elif token.match(T.Keyword, 'THEN'):
+                mode = VALUE
+
+            elif token.match(T.Keyword, 'ELSE'):
+                ret.append((None, []))
+                mode = VALUE
+
+            elif token.match(T.Keyword, 'END'):
+                mode = None
+
+            # First condition without preceding WHEN
+            if mode and not ret:
+                ret.append(([], []))
+
+            # Append token depending of the current mode
+            if mode == CONDITION:
+                ret[-1][0].append(token)
+
+            elif mode == VALUE:
+                ret[-1][1].append(token)
+
+        # Return cases list
+        return ret
+
+
+class Function(TokenList):
+    """A function or procedure call."""
+
+    __slots__ = ('value', 'ttype', 'tokens')
+
+    def get_parameters(self):
+        """Return a list of parameters."""
+        parenthesis = self.tokens[-1]
+        for t in parenthesis.tokens:
+            if isinstance(t, IdentifierList):
+                return t.get_identifiers()
+            elif isinstance(t, Identifier) or \
+                isinstance(t, Function) or \
+                t.ttype in T.Literal:
+                return [t,]
+        return []
+
+
+class Begin(TokenList):
+    """A BEGIN/END block."""
+
+    __slots__ = ('value', 'ttype', 'tokens')

[4/8] impala git commit: IMPALA-6999: Upgrade to sqlparse-0.1.19 for Impala shell

Reply via email to