http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py new file mode 100644 index 0000000..676344f --- /dev/null +++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py @@ -0,0 +1,728 @@ +# -*- coding: utf-8 -*- + +import re + +from os.path import abspath, join + +from sqlparse import sql, tokens as T +from sqlparse.engine import FilterStack +from sqlparse.lexer import tokenize +from sqlparse.pipeline import Pipeline +from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation, + String, Whitespace) +from sqlparse.utils import memoize_generator +from sqlparse.utils import split_unquoted_newlines + + +# -------------------------- +# token process + +class _CaseFilter: + + ttype = None + + def __init__(self, case=None): + if case is None: + case = 'upper' + assert case in ['lower', 'upper', 'capitalize'] + self.convert = getattr(unicode, case) + + def process(self, stack, stream): + for ttype, value in stream: + if ttype in self.ttype: + value = self.convert(value) + yield ttype, value + + +class KeywordCaseFilter(_CaseFilter): + ttype = T.Keyword + + +class IdentifierCaseFilter(_CaseFilter): + ttype = (T.Name, T.String.Symbol) + + def process(self, stack, stream): + for ttype, value in stream: + if ttype in self.ttype and not value.strip()[0] == '"': + value = self.convert(value) + yield ttype, value + + +class TruncateStringFilter: + + def __init__(self, width, char): + self.width = max(width, 1) + self.char = unicode(char) + + def process(self, stack, stream): + for ttype, value in stream: + if ttype is T.Literal.String.Single: + if value[:2] == '\'\'': + inner = value[2:-2] + quote = u'\'\'' + else: + inner = value[1:-1] + quote = u'\'' + if len(inner) > self.width: + value = u''.join((quote, inner[:self.width], self.char, + quote)) + yield ttype, value + + +class GetComments: + """Get the comments from a stack""" + def process(self, stack, stream): + for token_type, value in stream: + if token_type in Comment: + yield token_type, value + + +class StripComments: + """Strip the comments from a stack""" + def process(self, stack, stream): + for token_type, value in stream: + if token_type not in Comment: + yield token_type, value + + +def StripWhitespace(stream): + "Strip the useless whitespaces from a stream leaving only the minimal ones" + last_type = None + has_space = False + ignore_group = frozenset((Comparison, Punctuation)) + + for token_type, value in stream: + # We got a previous token (not empty first ones) + if last_type: + if token_type in Whitespace: + has_space = True + continue + + # Ignore first empty spaces and dot-commas + elif token_type in (Whitespace, Whitespace.Newline, ignore_group): + continue + + # Yield a whitespace if it can't be ignored + if has_space: + if not ignore_group.intersection((last_type, token_type)): + yield Whitespace, ' ' + has_space = False + + # Yield the token and set its type for checking with the next one + yield token_type, value + last_type = token_type + + +class IncludeStatement: + """Filter that enable a INCLUDE statement""" + + def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False): + if maxrecursive <= 0: + raise ValueError('Max recursion limit reached') + + self.dirpath = abspath(dirpath) + self.maxRecursive = maxrecursive + self.raiseexceptions = raiseexceptions + + self.detected = False + + @memoize_generator + def process(self, stack, stream): + # Run over all tokens in the stream + for token_type, value in stream: + # INCLUDE statement found, set detected mode + if token_type in Name and value.upper() == 'INCLUDE': + self.detected = True + continue + + # INCLUDE statement was found, parse it + elif self.detected: + # Omit whitespaces + if token_type in Whitespace: + continue + + # Found file path to include + if token_type in String.Symbol: +# if token_type in tokens.String.Symbol: + + # Get path of file to include + path = join(self.dirpath, value[1:-1]) + + try: + f = open(path) + raw_sql = f.read() + f.close() + + # There was a problem loading the include file + except IOError, err: + # Raise the exception to the interpreter + if self.raiseexceptions: + raise + + # Put the exception as a comment on the SQL code + yield Comment, u'-- IOError: %s\n' % err + + else: + # Create new FilterStack to parse readed file + # and add all its tokens to the main stack recursively + try: + filtr = IncludeStatement(self.dirpath, + self.maxRecursive - 1, + self.raiseexceptions) + + # Max recursion limit reached + except ValueError, err: + # Raise the exception to the interpreter + if self.raiseexceptions: + raise + + # Put the exception as a comment on the SQL code + yield Comment, u'-- ValueError: %s\n' % err + + stack = FilterStack() + stack.preprocess.append(filtr) + + for tv in stack.run(raw_sql): + yield tv + + # Set normal mode + self.detected = False + + # Don't include any token while in detected mode + continue + + # Normal token + yield token_type, value + + +# ---------------------- +# statement process + +class StripCommentsFilter: + + def _get_next_comment(self, tlist): + # TODO(andi) Comment types should be unified, see related issue38 + token = tlist.token_next_by_instance(0, sql.Comment) + if token is None: + token = tlist.token_next_by_type(0, T.Comment) + return token + + def _process(self, tlist): + token = self._get_next_comment(tlist) + while token: + tidx = tlist.token_index(token) + prev = tlist.token_prev(tidx, False) + next_ = tlist.token_next(tidx, False) + # Replace by whitespace if prev and next exist and if they're not + # whitespaces. This doesn't apply if prev or next is a paranthesis. + if (prev is not None and next_ is not None + and not prev.is_whitespace() and not next_.is_whitespace() + and not (prev.match(T.Punctuation, '(') + or next_.match(T.Punctuation, ')'))): + tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ') + else: + tlist.tokens.pop(tidx) + token = self._get_next_comment(tlist) + + def process(self, stack, stmt): + [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] + self._process(stmt) + + +class StripWhitespaceFilter: + + def _stripws(self, tlist): + func_name = '_stripws_%s' % tlist.__class__.__name__.lower() + func = getattr(self, func_name, self._stripws_default) + func(tlist) + + def _stripws_default(self, tlist): + last_was_ws = False + for token in tlist.tokens: + if token.is_whitespace(): + if last_was_ws: + token.value = '' + else: + token.value = ' ' + last_was_ws = token.is_whitespace() + + def _stripws_identifierlist(self, tlist): + # Removes newlines before commas, see issue140 + last_nl = None + for token in tlist.tokens[:]: + if (token.ttype is T.Punctuation + and token.value == ',' + and last_nl is not None): + tlist.tokens.remove(last_nl) + if token.is_whitespace(): + last_nl = token + else: + last_nl = None + return self._stripws_default(tlist) + + def _stripws_parenthesis(self, tlist): + if tlist.tokens[1].is_whitespace(): + tlist.tokens.pop(1) + if tlist.tokens[-2].is_whitespace(): + tlist.tokens.pop(-2) + self._stripws_default(tlist) + + def process(self, stack, stmt, depth=0): + [self.process(stack, sgroup, depth + 1) + for sgroup in stmt.get_sublists()] + self._stripws(stmt) + if ( + depth == 0 + and stmt.tokens + and stmt.tokens[-1].is_whitespace() + ): + stmt.tokens.pop(-1) + + +class ReindentFilter: + + def __init__(self, width=2, char=' ', line_width=None): + self.width = width + self.char = char + self.indent = 0 + self.offset = 0 + self.line_width = line_width + self._curr_stmt = None + self._last_stmt = None + + def _flatten_up_to_token(self, token): + """Yields all tokens up to token plus the next one.""" + # helper for _get_offset + iterator = self._curr_stmt.flatten() + for t in iterator: + yield t + if t == token: + raise StopIteration + + def _get_offset(self, token): + raw = ''.join(map(unicode, self._flatten_up_to_token(token))) + line = raw.splitlines()[-1] + # Now take current offset into account and return relative offset. + full_offset = len(line) - len(self.char * (self.width * self.indent)) + return full_offset - self.offset + + def nl(self): + # TODO: newline character should be configurable + space = (self.char * ((self.indent * self.width) + self.offset)) + # Detect runaway indenting due to parsing errors + if len(space) > 200: + # something seems to be wrong, flip back + self.indent = self.offset = 0 + space = (self.char * ((self.indent * self.width) + self.offset)) + ws = '\n' + space + return sql.Token(T.Whitespace, ws) + + def _split_kwds(self, tlist): + split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR', + 'GROUP', 'ORDER', 'UNION', 'VALUES', + 'SET', 'BETWEEN', 'EXCEPT', 'HAVING') + + def _next_token(i): + t = tlist.token_next_match(i, T.Keyword, split_words, + regex=True) + if t and t.value.upper() == 'BETWEEN': + t = _next_token(tlist.token_index(t) + 1) + if t and t.value.upper() == 'AND': + t = _next_token(tlist.token_index(t) + 1) + return t + + idx = 0 + token = _next_token(idx) + added = set() + while token: + prev = tlist.token_prev(tlist.token_index(token), False) + offset = 1 + if prev and prev.is_whitespace() and prev not in added: + tlist.tokens.pop(tlist.token_index(prev)) + offset += 1 + uprev = unicode(prev) + if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))): + nl = tlist.token_next(token) + else: + nl = self.nl() + added.add(nl) + tlist.insert_before(token, nl) + offset += 1 + token = _next_token(tlist.token_index(nl) + offset) + + def _split_statements(self, tlist): + idx = 0 + token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) + while token: + prev = tlist.token_prev(tlist.token_index(token), False) + if prev and prev.is_whitespace(): + tlist.tokens.pop(tlist.token_index(prev)) + # only break if it's not the first token + if prev: + nl = self.nl() + tlist.insert_before(token, nl) + token = tlist.token_next_by_type(tlist.token_index(token) + 1, + (T.Keyword.DDL, T.Keyword.DML)) + + def _process(self, tlist): + func_name = '_process_%s' % tlist.__class__.__name__.lower() + func = getattr(self, func_name, self._process_default) + func(tlist) + + def _process_where(self, tlist): + token = tlist.token_next_match(0, T.Keyword, 'WHERE') + try: + tlist.insert_before(token, self.nl()) + except ValueError: # issue121, errors in statement + pass + self.indent += 1 + self._process_default(tlist) + self.indent -= 1 + + def _process_having(self, tlist): + token = tlist.token_next_match(0, T.Keyword, 'HAVING') + try: + tlist.insert_before(token, self.nl()) + except ValueError: # issue121, errors in statement + pass + self.indent += 1 + self._process_default(tlist) + self.indent -= 1 + + def _process_parenthesis(self, tlist): + first = tlist.token_next(0) + indented = False + if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): + self.indent += 1 + tlist.tokens.insert(0, self.nl()) + indented = True + num_offset = self._get_offset( + tlist.token_next_match(0, T.Punctuation, '(')) + self.offset += num_offset + self._process_default(tlist, stmts=not indented) + if indented: + self.indent -= 1 + self.offset -= num_offset + + def _process_identifierlist(self, tlist): + identifiers = list(tlist.get_identifiers()) + if len(identifiers) > 1 and not tlist.within(sql.Function): + first = list(identifiers[0].flatten())[0] + if self.char == '\t': + # when using tabs we don't count the actual word length + # in spaces. + num_offset = 1 + else: + num_offset = self._get_offset(first) - len(first.value) + self.offset += num_offset + for token in identifiers[1:]: + tlist.insert_before(token, self.nl()) + self.offset -= num_offset + self._process_default(tlist) + + def _process_case(self, tlist): + is_first = True + num_offset = None + case = tlist.tokens[0] + outer_offset = self._get_offset(case) - len(case.value) + self.offset += outer_offset + for cond, value in tlist.get_cases(): + if is_first: + tcond = list(cond[0].flatten())[0] + is_first = False + num_offset = self._get_offset(tcond) - len(tcond.value) + self.offset += num_offset + continue + if cond is None: + token = value[0] + else: + token = cond[0] + tlist.insert_before(token, self.nl()) + # Line breaks on group level are done. Now let's add an offset of + # 5 (=length of "when", "then", "else") and process subgroups. + self.offset += 5 + self._process_default(tlist) + self.offset -= 5 + if num_offset is not None: + self.offset -= num_offset + end = tlist.token_next_match(0, T.Keyword, 'END') + tlist.insert_before(end, self.nl()) + self.offset -= outer_offset + + def _process_default(self, tlist, stmts=True, kwds=True): + if stmts: + self._split_statements(tlist) + if kwds: + self._split_kwds(tlist) + [self._process(sgroup) for sgroup in tlist.get_sublists()] + + def process(self, stack, stmt): + if isinstance(stmt, sql.Statement): + self._curr_stmt = stmt + self._process(stmt) + if isinstance(stmt, sql.Statement): + if self._last_stmt is not None: + if unicode(self._last_stmt).endswith('\n'): + nl = '\n' + else: + nl = '\n\n' + stmt.tokens.insert( + 0, sql.Token(T.Whitespace, nl)) + if self._last_stmt != stmt: + self._last_stmt = stmt + + +# FIXME: Doesn't work ;) +class RightMarginFilter: + + keep_together = ( + # sql.TypeCast, sql.Identifier, sql.Alias, + ) + + def __init__(self, width=79): + self.width = width + self.line = '' + + def _process(self, stack, group, stream): + for token in stream: + if token.is_whitespace() and '\n' in token.value: + if token.value.endswith('\n'): + self.line = '' + else: + self.line = token.value.splitlines()[-1] + elif (token.is_group() + and not token.__class__ in self.keep_together): + token.tokens = self._process(stack, token, token.tokens) + else: + val = unicode(token) + if len(self.line) + len(val) > self.width: + match = re.search('^ +', self.line) + if match is not None: + indent = match.group() + else: + indent = '' + yield sql.Token(T.Whitespace, '\n%s' % indent) + self.line = indent + self.line += val + yield token + + def process(self, stack, group): + return + group.tokens = self._process(stack, group, group.tokens) + + +class ColumnsSelect: + """Get the columns names of a SELECT query""" + def process(self, stack, stream): + mode = 0 + oldValue = "" + parenthesis = 0 + + for token_type, value in stream: + # Ignore comments + if token_type in Comment: + continue + + # We have not detected a SELECT statement + if mode == 0: + if token_type in Keyword and value == 'SELECT': + mode = 1 + + # We have detected a SELECT statement + elif mode == 1: + if value == 'FROM': + if oldValue: + yield oldValue + + mode = 3 # Columns have been checked + + elif value == 'AS': + oldValue = "" + mode = 2 + + elif (token_type == Punctuation + and value == ',' and not parenthesis): + if oldValue: + yield oldValue + oldValue = "" + + elif token_type not in Whitespace: + if value == '(': + parenthesis += 1 + elif value == ')': + parenthesis -= 1 + + oldValue += value + + # We are processing an AS keyword + elif mode == 2: + # We check also for Keywords because a bug in SQLParse + if token_type == Name or token_type == Keyword: + yield value + mode = 1 + + +# --------------------------- +# postprocess + +class SerializerUnicode: + + def process(self, stack, stmt): + raw = unicode(stmt) + lines = split_unquoted_newlines(raw) + res = '\n'.join(line.rstrip() for line in lines) + return res + + +def Tokens2Unicode(stream): + result = "" + + for _, value in stream: + result += unicode(value) + + return result + + +class OutputFilter: + varname_prefix = '' + + def __init__(self, varname='sql'): + self.varname = self.varname_prefix + varname + self.count = 0 + + def _process(self, stream, varname, has_nl): + raise NotImplementedError + + def process(self, stack, stmt): + self.count += 1 + if self.count > 1: + varname = '%s%d' % (self.varname, self.count) + else: + varname = self.varname + + has_nl = len(unicode(stmt).strip().splitlines()) > 1 + stmt.tokens = self._process(stmt.tokens, varname, has_nl) + return stmt + + +class OutputPythonFilter(OutputFilter): + def _process(self, stream, varname, has_nl): + # SQL query asignation to varname + if self.count > 1: + yield sql.Token(T.Whitespace, '\n') + yield sql.Token(T.Name, varname) + yield sql.Token(T.Whitespace, ' ') + yield sql.Token(T.Operator, '=') + yield sql.Token(T.Whitespace, ' ') + if has_nl: + yield sql.Token(T.Operator, '(') + yield sql.Token(T.Text, "'") + + # Print the tokens on the quote + for token in stream: + # Token is a new line separator + if token.is_whitespace() and '\n' in token.value: + # Close quote and add a new line + yield sql.Token(T.Text, " '") + yield sql.Token(T.Whitespace, '\n') + + # Quote header on secondary lines + yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4)) + yield sql.Token(T.Text, "'") + + # Indentation + after_lb = token.value.split('\n', 1)[1] + if after_lb: + yield sql.Token(T.Whitespace, after_lb) + continue + + # Token has escape chars + elif "'" in token.value: + token.value = token.value.replace("'", "\\'") + + # Put the token + yield sql.Token(T.Text, token.value) + + # Close quote + yield sql.Token(T.Text, "'") + if has_nl: + yield sql.Token(T.Operator, ')') + + +class OutputPHPFilter(OutputFilter): + varname_prefix = '$' + + def _process(self, stream, varname, has_nl): + # SQL query asignation to varname (quote header) + if self.count > 1: + yield sql.Token(T.Whitespace, '\n') + yield sql.Token(T.Name, varname) + yield sql.Token(T.Whitespace, ' ') + if has_nl: + yield sql.Token(T.Whitespace, ' ') + yield sql.Token(T.Operator, '=') + yield sql.Token(T.Whitespace, ' ') + yield sql.Token(T.Text, '"') + + # Print the tokens on the quote + for token in stream: + # Token is a new line separator + if token.is_whitespace() and '\n' in token.value: + # Close quote and add a new line + yield sql.Token(T.Text, ' ";') + yield sql.Token(T.Whitespace, '\n') + + # Quote header on secondary lines + yield sql.Token(T.Name, varname) + yield sql.Token(T.Whitespace, ' ') + yield sql.Token(T.Operator, '.=') + yield sql.Token(T.Whitespace, ' ') + yield sql.Token(T.Text, '"') + + # Indentation + after_lb = token.value.split('\n', 1)[1] + if after_lb: + yield sql.Token(T.Whitespace, after_lb) + continue + + # Token has escape chars + elif '"' in token.value: + token.value = token.value.replace('"', '\\"') + + # Put the token + yield sql.Token(T.Text, token.value) + + # Close quote + yield sql.Token(T.Text, '"') + yield sql.Token(T.Punctuation, ';') + + +class Limit: + """Get the LIMIT of a query. + + If not defined, return -1 (SQL specification for no LIMIT query) + """ + def process(self, stack, stream): + index = 7 + stream = list(stream) + stream.reverse() + + # Run over all tokens in the stream from the end + for token_type, value in stream: + index -= 1 + +# if index and token_type in Keyword: + if index and token_type in Keyword and value == 'LIMIT': + return stream[4 - index][1] + + return -1 + + +def compact(stream): + """Function that return a compacted version of the stream""" + pipe = Pipeline() + + pipe.append(StripComments()) + pipe.append(StripWhitespace) + + return pipe(stream)
http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py new file mode 100644 index 0000000..811f5af --- /dev/null +++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py @@ -0,0 +1,137 @@ +# Copyright (C) 2008 Andi Albrecht, [email protected] +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL formatter""" + +from sqlparse import filters +from sqlparse.exceptions import SQLParseError + + +def validate_options(options): + """Validates options.""" + kwcase = options.get('keyword_case', None) + if kwcase not in [None, 'upper', 'lower', 'capitalize']: + raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) + + idcase = options.get('identifier_case', None) + if idcase not in [None, 'upper', 'lower', 'capitalize']: + raise SQLParseError('Invalid value for identifier_case: %r' % idcase) + + ofrmt = options.get('output_format', None) + if ofrmt not in [None, 'sql', 'python', 'php']: + raise SQLParseError('Unknown output format: %r' % ofrmt) + + strip_comments = options.get('strip_comments', False) + if strip_comments not in [True, False]: + raise SQLParseError('Invalid value for strip_comments: %r' + % strip_comments) + + strip_ws = options.get('strip_whitespace', False) + if strip_ws not in [True, False]: + raise SQLParseError('Invalid value for strip_whitespace: %r' + % strip_ws) + + truncate_strings = options.get('truncate_strings', None) + if truncate_strings is not None: + try: + truncate_strings = int(truncate_strings) + except (ValueError, TypeError): + raise SQLParseError('Invalid value for truncate_strings: %r' + % truncate_strings) + if truncate_strings <= 1: + raise SQLParseError('Invalid value for truncate_strings: %r' + % truncate_strings) + options['truncate_strings'] = truncate_strings + options['truncate_char'] = options.get('truncate_char', '[...]') + + reindent = options.get('reindent', False) + if reindent not in [True, False]: + raise SQLParseError('Invalid value for reindent: %r' + % reindent) + elif reindent: + options['strip_whitespace'] = True + indent_tabs = options.get('indent_tabs', False) + if indent_tabs not in [True, False]: + raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs) + elif indent_tabs: + options['indent_char'] = '\t' + else: + options['indent_char'] = ' ' + indent_width = options.get('indent_width', 2) + try: + indent_width = int(indent_width) + except (TypeError, ValueError): + raise SQLParseError('indent_width requires an integer') + if indent_width < 1: + raise SQLParseError('indent_width requires an positive integer') + options['indent_width'] = indent_width + + right_margin = options.get('right_margin', None) + if right_margin is not None: + try: + right_margin = int(right_margin) + except (TypeError, ValueError): + raise SQLParseError('right_margin requires an integer') + if right_margin < 10: + raise SQLParseError('right_margin requires an integer > 10') + options['right_margin'] = right_margin + + return options + + +def build_filter_stack(stack, options): + """Setup and return a filter stack. + + Args: + stack: :class:`~sqlparse.filters.FilterStack` instance + options: Dictionary with options validated by validate_options. + """ + # Token filter + if options.get('keyword_case', None): + stack.preprocess.append( + filters.KeywordCaseFilter(options['keyword_case'])) + + if options.get('identifier_case', None): + stack.preprocess.append( + filters.IdentifierCaseFilter(options['identifier_case'])) + + if options.get('truncate_strings', None) is not None: + stack.preprocess.append(filters.TruncateStringFilter( + width=options['truncate_strings'], char=options['truncate_char'])) + + # After grouping + if options.get('strip_comments', False): + stack.enable_grouping() + stack.stmtprocess.append(filters.StripCommentsFilter()) + + if (options.get('strip_whitespace', False) + or options.get('reindent', False)): + stack.enable_grouping() + stack.stmtprocess.append(filters.StripWhitespaceFilter()) + + if options.get('reindent', False): + stack.enable_grouping() + stack.stmtprocess.append( + filters.ReindentFilter(char=options['indent_char'], + width=options['indent_width'])) + + if options.get('right_margin', False): + stack.enable_grouping() + stack.stmtprocess.append( + filters.RightMarginFilter(width=options['right_margin'])) + + # Serializer + if options.get('output_format'): + frmt = options['output_format'] + if frmt.lower() == 'php': + fltr = filters.OutputPHPFilter() + elif frmt.lower() == 'python': + fltr = filters.OutputPythonFilter() + else: + fltr = None + if fltr is not None: + stack.postprocess.append(fltr) + + return stack http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py new file mode 100644 index 0000000..e54457e --- /dev/null +++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py @@ -0,0 +1,44 @@ +''' +Created on 17/05/2012 + +@author: piranna + +Several utility functions to extract info from the SQL sentences +''' + +from sqlparse.filters import ColumnsSelect, Limit +from sqlparse.pipeline import Pipeline +from sqlparse.tokens import Keyword, Whitespace + + +def getlimit(stream): + """Function that return the LIMIT of a input SQL """ + pipe = Pipeline() + + pipe.append(Limit()) + + result = pipe(stream) + try: + return int(result) + except ValueError: + return result + + +def getcolumns(stream): + """Function that return the colums of a SELECT query""" + pipe = Pipeline() + + pipe.append(ColumnsSelect()) + + return pipe(stream) + + +class IsType(object): + """Functor that return is the statement is of a specific type""" + def __init__(self, type): + self.type = type + + def __call__(self, stream): + for token_type, value in stream: + if token_type not in Whitespace: + return token_type in Keyword and value == self.type http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py new file mode 100644 index 0000000..1595aa8 --- /dev/null +++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py @@ -0,0 +1,574 @@ +from sqlparse import tokens + +KEYWORDS = { + 'ABORT': tokens.Keyword, + 'ABS': tokens.Keyword, + 'ABSOLUTE': tokens.Keyword, + 'ACCESS': tokens.Keyword, + 'ADA': tokens.Keyword, + 'ADD': tokens.Keyword, + 'ADMIN': tokens.Keyword, + 'AFTER': tokens.Keyword, + 'AGGREGATE': tokens.Keyword, + 'ALIAS': tokens.Keyword, + 'ALL': tokens.Keyword, + 'ALLOCATE': tokens.Keyword, + 'ANALYSE': tokens.Keyword, + 'ANALYZE': tokens.Keyword, + 'ANY': tokens.Keyword, + 'ARE': tokens.Keyword, + 'ASC': tokens.Keyword.Order, + 'ASENSITIVE': tokens.Keyword, + 'ASSERTION': tokens.Keyword, + 'ASSIGNMENT': tokens.Keyword, + 'ASYMMETRIC': tokens.Keyword, + 'AT': tokens.Keyword, + 'ATOMIC': tokens.Keyword, + 'AUTHORIZATION': tokens.Keyword, + 'AVG': tokens.Keyword, + + 'BACKWARD': tokens.Keyword, + 'BEFORE': tokens.Keyword, + 'BEGIN': tokens.Keyword, + 'BETWEEN': tokens.Keyword, + 'BITVAR': tokens.Keyword, + 'BIT_LENGTH': tokens.Keyword, + 'BOTH': tokens.Keyword, + 'BREADTH': tokens.Keyword, + + # 'C': tokens.Keyword, # most likely this is an alias + 'CACHE': tokens.Keyword, + 'CALL': tokens.Keyword, + 'CALLED': tokens.Keyword, + 'CARDINALITY': tokens.Keyword, + 'CASCADE': tokens.Keyword, + 'CASCADED': tokens.Keyword, + 'CAST': tokens.Keyword, + 'CATALOG': tokens.Keyword, + 'CATALOG_NAME': tokens.Keyword, + 'CHAIN': tokens.Keyword, + 'CHARACTERISTICS': tokens.Keyword, + 'CHARACTER_LENGTH': tokens.Keyword, + 'CHARACTER_SET_CATALOG': tokens.Keyword, + 'CHARACTER_SET_NAME': tokens.Keyword, + 'CHARACTER_SET_SCHEMA': tokens.Keyword, + 'CHAR_LENGTH': tokens.Keyword, + 'CHECK': tokens.Keyword, + 'CHECKED': tokens.Keyword, + 'CHECKPOINT': tokens.Keyword, + 'CLASS': tokens.Keyword, + 'CLASS_ORIGIN': tokens.Keyword, + 'CLOB': tokens.Keyword, + 'CLOSE': tokens.Keyword, + 'CLUSTER': tokens.Keyword, + 'COALESCE': tokens.Keyword, + 'COBOL': tokens.Keyword, + 'COLLATE': tokens.Keyword, + 'COLLATION': tokens.Keyword, + 'COLLATION_CATALOG': tokens.Keyword, + 'COLLATION_NAME': tokens.Keyword, + 'COLLATION_SCHEMA': tokens.Keyword, + 'COLLECT': tokens.Keyword, + 'COLUMN': tokens.Keyword, + 'COLUMN_NAME': tokens.Keyword, + 'COMMAND_FUNCTION': tokens.Keyword, + 'COMMAND_FUNCTION_CODE': tokens.Keyword, + 'COMMENT': tokens.Keyword, + 'COMMIT': tokens.Keyword.DML, + 'COMMITTED': tokens.Keyword, + 'COMPLETION': tokens.Keyword, + 'CONDITION_NUMBER': tokens.Keyword, + 'CONNECT': tokens.Keyword, + 'CONNECTION': tokens.Keyword, + 'CONNECTION_NAME': tokens.Keyword, + 'CONSTRAINT': tokens.Keyword, + 'CONSTRAINTS': tokens.Keyword, + 'CONSTRAINT_CATALOG': tokens.Keyword, + 'CONSTRAINT_NAME': tokens.Keyword, + 'CONSTRAINT_SCHEMA': tokens.Keyword, + 'CONSTRUCTOR': tokens.Keyword, + 'CONTAINS': tokens.Keyword, + 'CONTINUE': tokens.Keyword, + 'CONVERSION': tokens.Keyword, + 'CONVERT': tokens.Keyword, + 'COPY': tokens.Keyword, + 'CORRESPONTING': tokens.Keyword, + 'COUNT': tokens.Keyword, + 'CREATEDB': tokens.Keyword, + 'CREATEUSER': tokens.Keyword, + 'CROSS': tokens.Keyword, + 'CUBE': tokens.Keyword, + 'CURRENT': tokens.Keyword, + 'CURRENT_DATE': tokens.Keyword, + 'CURRENT_PATH': tokens.Keyword, + 'CURRENT_ROLE': tokens.Keyword, + 'CURRENT_TIME': tokens.Keyword, + 'CURRENT_TIMESTAMP': tokens.Keyword, + 'CURRENT_USER': tokens.Keyword, + 'CURSOR': tokens.Keyword, + 'CURSOR_NAME': tokens.Keyword, + 'CYCLE': tokens.Keyword, + + 'DATA': tokens.Keyword, + 'DATABASE': tokens.Keyword, + 'DATETIME_INTERVAL_CODE': tokens.Keyword, + 'DATETIME_INTERVAL_PRECISION': tokens.Keyword, + 'DAY': tokens.Keyword, + 'DEALLOCATE': tokens.Keyword, + 'DECLARE': tokens.Keyword, + 'DEFAULT': tokens.Keyword, + 'DEFAULTS': tokens.Keyword, + 'DEFERRABLE': tokens.Keyword, + 'DEFERRED': tokens.Keyword, + 'DEFINED': tokens.Keyword, + 'DEFINER': tokens.Keyword, + 'DELIMITER': tokens.Keyword, + 'DELIMITERS': tokens.Keyword, + 'DEREF': tokens.Keyword, + 'DESC': tokens.Keyword.Order, + 'DESCRIBE': tokens.Keyword, + 'DESCRIPTOR': tokens.Keyword, + 'DESTROY': tokens.Keyword, + 'DESTRUCTOR': tokens.Keyword, + 'DETERMINISTIC': tokens.Keyword, + 'DIAGNOSTICS': tokens.Keyword, + 'DICTIONARY': tokens.Keyword, + 'DISCONNECT': tokens.Keyword, + 'DISPATCH': tokens.Keyword, + 'DO': tokens.Keyword, + 'DOMAIN': tokens.Keyword, + 'DYNAMIC': tokens.Keyword, + 'DYNAMIC_FUNCTION': tokens.Keyword, + 'DYNAMIC_FUNCTION_CODE': tokens.Keyword, + + 'EACH': tokens.Keyword, + 'ENCODING': tokens.Keyword, + 'ENCRYPTED': tokens.Keyword, + 'END-EXEC': tokens.Keyword, + 'EQUALS': tokens.Keyword, + 'ESCAPE': tokens.Keyword, + 'EVERY': tokens.Keyword, + 'EXCEPT': tokens.Keyword, + 'ESCEPTION': tokens.Keyword, + 'EXCLUDING': tokens.Keyword, + 'EXCLUSIVE': tokens.Keyword, + 'EXEC': tokens.Keyword, + 'EXECUTE': tokens.Keyword, + 'EXISTING': tokens.Keyword, + 'EXISTS': tokens.Keyword, + 'EXTERNAL': tokens.Keyword, + 'EXTRACT': tokens.Keyword, + + 'FALSE': tokens.Keyword, + 'FETCH': tokens.Keyword, + 'FINAL': tokens.Keyword, + 'FIRST': tokens.Keyword, + 'FORCE': tokens.Keyword, + 'FOREACH': tokens.Keyword, + 'FOREIGN': tokens.Keyword, + 'FORTRAN': tokens.Keyword, + 'FORWARD': tokens.Keyword, + 'FOUND': tokens.Keyword, + 'FREE': tokens.Keyword, + 'FREEZE': tokens.Keyword, + 'FULL': tokens.Keyword, + 'FUNCTION': tokens.Keyword, + + # 'G': tokens.Keyword, + 'GENERAL': tokens.Keyword, + 'GENERATED': tokens.Keyword, + 'GET': tokens.Keyword, + 'GLOBAL': tokens.Keyword, + 'GO': tokens.Keyword, + 'GOTO': tokens.Keyword, + 'GRANT': tokens.Keyword, + 'GRANTED': tokens.Keyword, + 'GROUPING': tokens.Keyword, + + 'HANDLER': tokens.Keyword, + 'HAVING': tokens.Keyword, + 'HIERARCHY': tokens.Keyword, + 'HOLD': tokens.Keyword, + 'HOST': tokens.Keyword, + + 'IDENTITY': tokens.Keyword, + 'IGNORE': tokens.Keyword, + 'ILIKE': tokens.Keyword, + 'IMMEDIATE': tokens.Keyword, + 'IMMUTABLE': tokens.Keyword, + + 'IMPLEMENTATION': tokens.Keyword, + 'IMPLICIT': tokens.Keyword, + 'INCLUDING': tokens.Keyword, + 'INCREMENT': tokens.Keyword, + 'INDEX': tokens.Keyword, + + 'INDITCATOR': tokens.Keyword, + 'INFIX': tokens.Keyword, + 'INHERITS': tokens.Keyword, + 'INITIALIZE': tokens.Keyword, + 'INITIALLY': tokens.Keyword, + 'INOUT': tokens.Keyword, + 'INPUT': tokens.Keyword, + 'INSENSITIVE': tokens.Keyword, + 'INSTANTIABLE': tokens.Keyword, + 'INSTEAD': tokens.Keyword, + 'INTERSECT': tokens.Keyword, + 'INTO': tokens.Keyword, + 'INVOKER': tokens.Keyword, + 'IS': tokens.Keyword, + 'ISNULL': tokens.Keyword, + 'ISOLATION': tokens.Keyword, + 'ITERATE': tokens.Keyword, + + # 'K': tokens.Keyword, + 'KEY': tokens.Keyword, + 'KEY_MEMBER': tokens.Keyword, + 'KEY_TYPE': tokens.Keyword, + + 'LANCOMPILER': tokens.Keyword, + 'LANGUAGE': tokens.Keyword, + 'LARGE': tokens.Keyword, + 'LAST': tokens.Keyword, + 'LATERAL': tokens.Keyword, + 'LEADING': tokens.Keyword, + 'LENGTH': tokens.Keyword, + 'LESS': tokens.Keyword, + 'LEVEL': tokens.Keyword, + 'LIMIT': tokens.Keyword, + 'LISTEN': tokens.Keyword, + 'LOAD': tokens.Keyword, + 'LOCAL': tokens.Keyword, + 'LOCALTIME': tokens.Keyword, + 'LOCALTIMESTAMP': tokens.Keyword, + 'LOCATION': tokens.Keyword, + 'LOCATOR': tokens.Keyword, + 'LOCK': tokens.Keyword, + 'LOWER': tokens.Keyword, + + # 'M': tokens.Keyword, + 'MAP': tokens.Keyword, + 'MATCH': tokens.Keyword, + 'MAXVALUE': tokens.Keyword, + 'MESSAGE_LENGTH': tokens.Keyword, + 'MESSAGE_OCTET_LENGTH': tokens.Keyword, + 'MESSAGE_TEXT': tokens.Keyword, + 'METHOD': tokens.Keyword, + 'MINUTE': tokens.Keyword, + 'MINVALUE': tokens.Keyword, + 'MOD': tokens.Keyword, + 'MODE': tokens.Keyword, + 'MODIFIES': tokens.Keyword, + 'MODIFY': tokens.Keyword, + 'MONTH': tokens.Keyword, + 'MORE': tokens.Keyword, + 'MOVE': tokens.Keyword, + 'MUMPS': tokens.Keyword, + + 'NAMES': tokens.Keyword, + 'NATIONAL': tokens.Keyword, + 'NATURAL': tokens.Keyword, + 'NCHAR': tokens.Keyword, + 'NCLOB': tokens.Keyword, + 'NEW': tokens.Keyword, + 'NEXT': tokens.Keyword, + 'NO': tokens.Keyword, + 'NOCREATEDB': tokens.Keyword, + 'NOCREATEUSER': tokens.Keyword, + 'NONE': tokens.Keyword, + 'NOT': tokens.Keyword, + 'NOTHING': tokens.Keyword, + 'NOTIFY': tokens.Keyword, + 'NOTNULL': tokens.Keyword, + 'NULL': tokens.Keyword, + 'NULLABLE': tokens.Keyword, + 'NULLIF': tokens.Keyword, + + 'OBJECT': tokens.Keyword, + 'OCTET_LENGTH': tokens.Keyword, + 'OF': tokens.Keyword, + 'OFF': tokens.Keyword, + 'OFFSET': tokens.Keyword, + 'OIDS': tokens.Keyword, + 'OLD': tokens.Keyword, + 'ONLY': tokens.Keyword, + 'OPEN': tokens.Keyword, + 'OPERATION': tokens.Keyword, + 'OPERATOR': tokens.Keyword, + 'OPTION': tokens.Keyword, + 'OPTIONS': tokens.Keyword, + 'ORDINALITY': tokens.Keyword, + 'OUT': tokens.Keyword, + 'OUTPUT': tokens.Keyword, + 'OVERLAPS': tokens.Keyword, + 'OVERLAY': tokens.Keyword, + 'OVERRIDING': tokens.Keyword, + 'OWNER': tokens.Keyword, + + 'PAD': tokens.Keyword, + 'PARAMETER': tokens.Keyword, + 'PARAMETERS': tokens.Keyword, + 'PARAMETER_MODE': tokens.Keyword, + 'PARAMATER_NAME': tokens.Keyword, + 'PARAMATER_ORDINAL_POSITION': tokens.Keyword, + 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword, + 'PARAMETER_SPECIFIC_NAME': tokens.Keyword, + 'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword, + 'PARTIAL': tokens.Keyword, + 'PASCAL': tokens.Keyword, + 'PENDANT': tokens.Keyword, + 'PLACING': tokens.Keyword, + 'PLI': tokens.Keyword, + 'POSITION': tokens.Keyword, + 'POSTFIX': tokens.Keyword, + 'PRECISION': tokens.Keyword, + 'PREFIX': tokens.Keyword, + 'PREORDER': tokens.Keyword, + 'PREPARE': tokens.Keyword, + 'PRESERVE': tokens.Keyword, + 'PRIMARY': tokens.Keyword, + 'PRIOR': tokens.Keyword, + 'PRIVILEGES': tokens.Keyword, + 'PROCEDURAL': tokens.Keyword, + 'PROCEDURE': tokens.Keyword, + 'PUBLIC': tokens.Keyword, + + 'RAISE': tokens.Keyword, + 'READ': tokens.Keyword, + 'READS': tokens.Keyword, + 'RECHECK': tokens.Keyword, + 'RECURSIVE': tokens.Keyword, + 'REF': tokens.Keyword, + 'REFERENCES': tokens.Keyword, + 'REFERENCING': tokens.Keyword, + 'REINDEX': tokens.Keyword, + 'RELATIVE': tokens.Keyword, + 'RENAME': tokens.Keyword, + 'REPEATABLE': tokens.Keyword, + 'RESET': tokens.Keyword, + 'RESTART': tokens.Keyword, + 'RESTRICT': tokens.Keyword, + 'RESULT': tokens.Keyword, + 'RETURN': tokens.Keyword, + 'RETURNED_LENGTH': tokens.Keyword, + 'RETURNED_OCTET_LENGTH': tokens.Keyword, + 'RETURNED_SQLSTATE': tokens.Keyword, + 'RETURNS': tokens.Keyword, + 'REVOKE': tokens.Keyword, + 'RIGHT': tokens.Keyword, + 'ROLE': tokens.Keyword, + 'ROLLBACK': tokens.Keyword.DML, + 'ROLLUP': tokens.Keyword, + 'ROUTINE': tokens.Keyword, + 'ROUTINE_CATALOG': tokens.Keyword, + 'ROUTINE_NAME': tokens.Keyword, + 'ROUTINE_SCHEMA': tokens.Keyword, + 'ROW': tokens.Keyword, + 'ROWS': tokens.Keyword, + 'ROW_COUNT': tokens.Keyword, + 'RULE': tokens.Keyword, + + 'SAVE_POINT': tokens.Keyword, + 'SCALE': tokens.Keyword, + 'SCHEMA': tokens.Keyword, + 'SCHEMA_NAME': tokens.Keyword, + 'SCOPE': tokens.Keyword, + 'SCROLL': tokens.Keyword, + 'SEARCH': tokens.Keyword, + 'SECOND': tokens.Keyword, + 'SECURITY': tokens.Keyword, + 'SELF': tokens.Keyword, + 'SENSITIVE': tokens.Keyword, + 'SERIALIZABLE': tokens.Keyword, + 'SERVER_NAME': tokens.Keyword, + 'SESSION': tokens.Keyword, + 'SESSION_USER': tokens.Keyword, + 'SETOF': tokens.Keyword, + 'SETS': tokens.Keyword, + 'SHARE': tokens.Keyword, + 'SHOW': tokens.Keyword, + 'SIMILAR': tokens.Keyword, + 'SIMPLE': tokens.Keyword, + 'SIZE': tokens.Keyword, + 'SOME': tokens.Keyword, + 'SOURCE': tokens.Keyword, + 'SPACE': tokens.Keyword, + 'SPECIFIC': tokens.Keyword, + 'SPECIFICTYPE': tokens.Keyword, + 'SPECIFIC_NAME': tokens.Keyword, + 'SQL': tokens.Keyword, + 'SQLCODE': tokens.Keyword, + 'SQLERROR': tokens.Keyword, + 'SQLEXCEPTION': tokens.Keyword, + 'SQLSTATE': tokens.Keyword, + 'SQLWARNING': tokens.Keyword, + 'STABLE': tokens.Keyword, + 'START': tokens.Keyword.DML, + 'STATE': tokens.Keyword, + 'STATEMENT': tokens.Keyword, + 'STATIC': tokens.Keyword, + 'STATISTICS': tokens.Keyword, + 'STDIN': tokens.Keyword, + 'STDOUT': tokens.Keyword, + 'STORAGE': tokens.Keyword, + 'STRICT': tokens.Keyword, + 'STRUCTURE': tokens.Keyword, + 'STYPE': tokens.Keyword, + 'SUBCLASS_ORIGIN': tokens.Keyword, + 'SUBLIST': tokens.Keyword, + 'SUBSTRING': tokens.Keyword, + 'SUM': tokens.Keyword, + 'SYMMETRIC': tokens.Keyword, + 'SYSID': tokens.Keyword, + 'SYSTEM': tokens.Keyword, + 'SYSTEM_USER': tokens.Keyword, + + 'TABLE': tokens.Keyword, + 'TABLE_NAME': tokens.Keyword, + 'TEMP': tokens.Keyword, + 'TEMPLATE': tokens.Keyword, + 'TEMPORARY': tokens.Keyword, + 'TERMINATE': tokens.Keyword, + 'THAN': tokens.Keyword, + 'TIMESTAMP': tokens.Keyword, + 'TIMEZONE_HOUR': tokens.Keyword, + 'TIMEZONE_MINUTE': tokens.Keyword, + 'TO': tokens.Keyword, + 'TOAST': tokens.Keyword, + 'TRAILING': tokens.Keyword, + 'TRANSATION': tokens.Keyword, + 'TRANSACTIONS_COMMITTED': tokens.Keyword, + 'TRANSACTIONS_ROLLED_BACK': tokens.Keyword, + 'TRANSATION_ACTIVE': tokens.Keyword, + 'TRANSFORM': tokens.Keyword, + 'TRANSFORMS': tokens.Keyword, + 'TRANSLATE': tokens.Keyword, + 'TRANSLATION': tokens.Keyword, + 'TREAT': tokens.Keyword, + 'TRIGGER': tokens.Keyword, + 'TRIGGER_CATALOG': tokens.Keyword, + 'TRIGGER_NAME': tokens.Keyword, + 'TRIGGER_SCHEMA': tokens.Keyword, + 'TRIM': tokens.Keyword, + 'TRUE': tokens.Keyword, + 'TRUNCATE': tokens.Keyword, + 'TRUSTED': tokens.Keyword, + 'TYPE': tokens.Keyword, + + 'UNCOMMITTED': tokens.Keyword, + 'UNDER': tokens.Keyword, + 'UNENCRYPTED': tokens.Keyword, + 'UNION': tokens.Keyword, + 'UNIQUE': tokens.Keyword, + 'UNKNOWN': tokens.Keyword, + 'UNLISTEN': tokens.Keyword, + 'UNNAMED': tokens.Keyword, + 'UNNEST': tokens.Keyword, + 'UNTIL': tokens.Keyword, + 'UPPER': tokens.Keyword, + 'USAGE': tokens.Keyword, + 'USE': tokens.Keyword, + 'USER': tokens.Keyword, + 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword, + 'USER_DEFINED_TYPE_NAME': tokens.Keyword, + 'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword, + 'USING': tokens.Keyword, + + 'VACUUM': tokens.Keyword, + 'VALID': tokens.Keyword, + 'VALIDATOR': tokens.Keyword, + 'VALUES': tokens.Keyword, + 'VARIABLE': tokens.Keyword, + 'VERBOSE': tokens.Keyword, + 'VERSION': tokens.Keyword, + 'VIEW': tokens.Keyword, + 'VOLATILE': tokens.Keyword, + + 'WHENEVER': tokens.Keyword, + 'WITH': tokens.Keyword, + 'WITHOUT': tokens.Keyword, + 'WORK': tokens.Keyword, + 'WRITE': tokens.Keyword, + + 'YEAR': tokens.Keyword, + + 'ZONE': tokens.Keyword, + + # Name.Builtin + 'ARRAY': tokens.Name.Builtin, + 'BIGINT': tokens.Name.Builtin, + 'BINARY': tokens.Name.Builtin, + 'BIT': tokens.Name.Builtin, + 'BLOB': tokens.Name.Builtin, + 'BOOLEAN': tokens.Name.Builtin, + 'CHAR': tokens.Name.Builtin, + 'CHARACTER': tokens.Name.Builtin, + 'DATE': tokens.Name.Builtin, + 'DEC': tokens.Name.Builtin, + 'DECIMAL': tokens.Name.Builtin, + 'FLOAT': tokens.Name.Builtin, + 'INT': tokens.Name.Builtin, + 'INT8': tokens.Name.Builtin, + 'INTEGER': tokens.Name.Builtin, + 'INTERVAL': tokens.Name.Builtin, + 'LONG': tokens.Name.Builtin, + 'NUMBER': tokens.Name.Builtin, + 'NUMERIC': tokens.Name.Builtin, + 'REAL': tokens.Name.Builtin, + 'SERIAL': tokens.Name.Builtin, + 'SERIAL8': tokens.Name.Builtin, + 'SIGNED': tokens.Name.Builtin, + 'SMALLINT': tokens.Name.Builtin, + 'TEXT': tokens.Name.Builtin, + 'TINYINT': tokens.Name.Builtin, + 'UNSIGNED': tokens.Name.Builtin, + 'VARCHAR': tokens.Name.Builtin, + 'VARCHAR2': tokens.Name.Builtin, + 'VARYING': tokens.Name.Builtin, +} + + +KEYWORDS_COMMON = { + 'SELECT': tokens.Keyword.DML, + 'INSERT': tokens.Keyword.DML, + 'DELETE': tokens.Keyword.DML, + 'UPDATE': tokens.Keyword.DML, + 'REPLACE': tokens.Keyword.DML, + 'MERGE': tokens.Keyword.DML, + 'DROP': tokens.Keyword.DDL, + 'CREATE': tokens.Keyword.DDL, + 'ALTER': tokens.Keyword.DDL, + + 'WHERE': tokens.Keyword, + 'FROM': tokens.Keyword, + 'INNER': tokens.Keyword, + 'JOIN': tokens.Keyword, + 'STRAIGHT_JOIN': tokens.Keyword, + 'AND': tokens.Keyword, + 'OR': tokens.Keyword, + 'LIKE': tokens.Keyword, + 'ON': tokens.Keyword, + 'IN': tokens.Keyword, + 'SET': tokens.Keyword, + + 'BY': tokens.Keyword, + 'GROUP': tokens.Keyword, + 'ORDER': tokens.Keyword, + 'LEFT': tokens.Keyword, + 'OUTER': tokens.Keyword, + 'FULL': tokens.Keyword, + + 'IF': tokens.Keyword, + 'END': tokens.Keyword, + 'THEN': tokens.Keyword, + 'LOOP': tokens.Keyword, + 'AS': tokens.Keyword, + 'ELSE': tokens.Keyword, + 'FOR': tokens.Keyword, + + 'CASE': tokens.Keyword, + 'WHEN': tokens.Keyword, + 'MIN': tokens.Keyword, + 'MAX': tokens.Keyword, + 'DISTINCT': tokens.Keyword, +} http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py new file mode 100644 index 0000000..fd29f5c --- /dev/null +++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2008 Andi Albrecht, [email protected] +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +"""SQL Lexer""" + +# This code is based on the SqlLexer in pygments. +# http://pygments.org/ +# It's separated from the rest of pygments to increase performance +# and to allow some customizations. + +import re +import sys + +from sqlparse import tokens +from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON +from cStringIO import StringIO + + +class include(str): + pass + + +class combined(tuple): + """Indicates a state combined from multiple states.""" + + def __new__(cls, *args): + return tuple.__new__(cls, args) + + def __init__(self, *args): + # tuple.__init__ doesn't do anything + pass + + +def is_keyword(value): + test = value.upper() + return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value + + +def apply_filters(stream, filters, lexer=None): + """ + Use this method to apply an iterable of filters to + a stream. If lexer is given it's forwarded to the + filter, otherwise the filter receives `None`. + """ + + def _apply(filter_, stream): + for token in filter_.filter(lexer, stream): + yield token + + for filter_ in filters: + stream = _apply(filter_, stream) + return stream + + +class LexerMeta(type): + """ + Metaclass for Lexer, creates the self._tokens attribute from + self.tokens on the first instantiation. + """ + + def _process_state(cls, unprocessed, processed, state): + assert type(state) is str, "wrong state name %r" % state + assert state[0] != '#', "invalid state name %r" % state + if state in processed: + return processed[state] + tokenlist = processed[state] = [] + rflags = cls.flags + for tdef in unprocessed[state]: + if isinstance(tdef, include): + # it's a state reference + assert tdef != state, "circular state reference %r" % state + tokenlist.extend(cls._process_state( + unprocessed, processed, str(tdef))) + continue + + assert type(tdef) is tuple, "wrong rule def %r" % tdef + + try: + rex = re.compile(tdef[0], rflags).match + except Exception, err: + raise ValueError(("uncompilable regex %r in state" + " %r of %r: %s" + % (tdef[0], state, cls, err))) + + assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \ + ('token type must be simple type or callable, not %r' + % (tdef[1],)) + + if len(tdef) == 2: + new_state = None + else: + tdef2 = tdef[2] + if isinstance(tdef2, str): + # an existing state + if tdef2 == '#pop': + new_state = -1 + elif tdef2 in unprocessed: + new_state = (tdef2,) + elif tdef2 == '#push': + new_state = tdef2 + elif tdef2[:5] == '#pop:': + new_state = -int(tdef2[5:]) + else: + assert False, 'unknown new state %r' % tdef2 + elif isinstance(tdef2, combined): + # combine a new state from existing ones + new_state = '_tmp_%d' % cls._tmpname + cls._tmpname += 1 + itokens = [] + for istate in tdef2: + assert istate != state, \ + 'circular state ref %r' % istate + itokens.extend(cls._process_state(unprocessed, + processed, istate)) + processed[new_state] = itokens + new_state = (new_state,) + elif isinstance(tdef2, tuple): + # push more than one state + for state in tdef2: + assert (state in unprocessed or + state in ('#pop', '#push')), \ + 'unknown new state ' + state + new_state = tdef2 + else: + assert False, 'unknown new state def %r' % tdef2 + tokenlist.append((rex, tdef[1], new_state)) + return tokenlist + + def process_tokendef(cls): + cls._all_tokens = {} + cls._tmpname = 0 + processed = cls._all_tokens[cls.__name__] = {} + #tokendefs = tokendefs or cls.tokens[name] + for state in cls.tokens.keys(): + cls._process_state(cls.tokens, processed, state) + return processed + + def __call__(cls, *args, **kwds): + if not hasattr(cls, '_tokens'): + cls._all_tokens = {} + cls._tmpname = 0 + if hasattr(cls, 'token_variants') and cls.token_variants: + # don't process yet + pass + else: + cls._tokens = cls.process_tokendef() + + return type.__call__(cls, *args, **kwds) + + +class Lexer(object): + + __metaclass__ = LexerMeta + + encoding = 'utf-8' + stripall = False + stripnl = False + tabsize = 0 + flags = re.IGNORECASE | re.UNICODE + + tokens = { + 'root': [ + (r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single), + # $ matches *before* newline, therefore we have two patterns + # to match Comment.Single + (r'(--|# ).*?$', tokens.Comment.Single), + (r'(\r\n|\r|\n)', tokens.Newline), + (r'\s+', tokens.Whitespace), + (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), + (r':=', tokens.Assignment), + (r'::', tokens.Punctuation), + (r'[*]', tokens.Wildcard), + (r'CASE\b', tokens.Keyword), # extended CASE(foo) + (r"`(``|[^`])*`", tokens.Name), + (r"´(´´|[^´])*´", tokens.Name), + (r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin), + (r'\?{1}', tokens.Name.Placeholder), + (r'%\(\w+\)s', tokens.Name.Placeholder), + (r'%s', tokens.Name.Placeholder), + (r'[$:?]\w+', tokens.Name.Placeholder), + # FIXME(andi): VALUES shouldn't be listed here + # see https://github.com/andialbrecht/sqlparse/pull/64 + (r'VALUES', tokens.Keyword), + (r'(@|##|#)[^\W\d_]\w+', tokens.Name), + # IN is special, it may be followed by a parenthesis, but + # is never a functino, see issue183 + (r'in\b(?=[ (])?', tokens.Keyword), + (r'[^\W\d_]\w*(?=[.(])', tokens.Name), # see issue39 + (r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal), + (r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float), + (r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float), + (r'[-]?[0-9]+', tokens.Number.Integer), + (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), + # not a real string literal in ANSI SQL: + (r'(""|".*?[^\\]")', tokens.String.Symbol), + # sqlite names can be escaped with [square brackets]. left bracket + # cannot be preceded by word character or a right bracket -- + # otherwise it's probably an array index + (r'(?<![\w\])])(\[[^\]]+\])', tokens.Name), + (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), + (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword), + (r'NOT NULL\b', tokens.Keyword), + (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), + (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), + (r'(?<=\.)[^\W\d_]\w*', tokens.Name), + (r'[^\W\d]\w*', is_keyword), + (r'[;:()\[\],\.]', tokens.Punctuation), + (r'[<>=~!]+', tokens.Operator.Comparison), + (r'[+/@#%^&|`?^-]+', tokens.Operator), + ], + 'multiline-comments': [ + (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), + (r'\*/', tokens.Comment.Multiline, '#pop'), + (r'[^/\*]+', tokens.Comment.Multiline), + (r'[/*]', tokens.Comment.Multiline), + ]} + + def __init__(self): + self.filters = [] + + def add_filter(self, filter_, **options): + from sqlparse.filters import Filter + if not isinstance(filter_, Filter): + filter_ = filter_(**options) + self.filters.append(filter_) + + def _decode(self, text): + if sys.version_info[0] == 3: + if isinstance(text, str): + return text + if self.encoding == 'guess': + try: + text = text.decode('utf-8') + if text.startswith(u'\ufeff'): + text = text[len(u'\ufeff'):] + except UnicodeDecodeError: + text = text.decode('latin1') + else: + try: + text = text.decode(self.encoding) + except UnicodeDecodeError: + text = text.decode('unicode-escape') + + if self.tabsize > 0: + text = text.expandtabs(self.tabsize) + return text + + def get_tokens(self, text, unfiltered=False): + """ + Return an iterable of (tokentype, value) pairs generated from + `text`. If `unfiltered` is set to `True`, the filtering mechanism + is bypassed even if filters are defined. + + Also preprocess the text, i.e. expand tabs and strip it if + wanted and applies registered filters. + """ + if isinstance(text, basestring): + if self.stripall: + text = text.strip() + elif self.stripnl: + text = text.strip('\n') + + if sys.version_info[0] < 3 and isinstance(text, unicode): + text = StringIO(text.encode('utf-8')) + self.encoding = 'utf-8' + else: + text = StringIO(text) + + def streamer(): + for i, t, v in self.get_tokens_unprocessed(text): + yield t, v + stream = streamer() + if not unfiltered: + stream = apply_filters(stream, self.filters, self) + return stream + + def get_tokens_unprocessed(self, stream, stack=('root',)): + """ + Split ``text`` into (tokentype, text) pairs. + + ``stack`` is the inital stack (default: ``['root']``) + """ + pos = 0 + tokendefs = self._tokens # see __call__, pylint:disable=E1101 + statestack = list(stack) + statetokens = tokendefs[statestack[-1]] + known_names = {} + + text = stream.read() + text = self._decode(text) + + while 1: + for rexmatch, action, new_state in statetokens: + m = rexmatch(text, pos) + if m: + value = m.group() + if value in known_names: + yield pos, known_names[value], value + elif type(action) is tokens._TokenType: + yield pos, action, value + elif hasattr(action, '__call__'): + ttype, value = action(value) + known_names[value] = ttype + yield pos, ttype, value + else: + for item in action(self, m): + yield item + pos = m.end() + if new_state is not None: + # state transition + if isinstance(new_state, tuple): + for state in new_state: + if state == '#pop': + statestack.pop() + elif state == '#push': + statestack.append(statestack[-1]) + elif ( + # Ugly hack - multiline-comments + # are not stackable + state != 'multiline-comments' + or not statestack + or statestack[-1] != 'multiline-comments' + ): + statestack.append(state) + elif isinstance(new_state, int): + # pop + del statestack[new_state:] + elif new_state == '#push': + statestack.append(statestack[-1]) + else: + assert False, "wrong state def: %r" % new_state + statetokens = tokendefs[statestack[-1]] + break + else: + try: + if text[pos] == '\n': + # at EOL, reset state to "root" + pos += 1 + statestack = ['root'] + statetokens = tokendefs['root'] + yield pos, tokens.Text, u'\n' + continue + yield pos, tokens.Error, text[pos] + pos += 1 + except IndexError: + break + + +def tokenize(sql, encoding=None): + """Tokenize sql. + + Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream + of ``(token type, value)`` items. + """ + lexer = Lexer() + if encoding is not None: + lexer.encoding = encoding + return lexer.get_tokens(sql) http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py new file mode 100644 index 0000000..34dad19 --- /dev/null +++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py @@ -0,0 +1,31 @@ +# Copyright (C) 2011 Jesus Leganes "piranna", [email protected] +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php. + +from types import GeneratorType + + +class Pipeline(list): + """Pipeline to process filters sequentially""" + + def __call__(self, stream): + """Run the pipeline + + Return a static (non generator) version of the result + """ + + # Run the stream over all the filters on the pipeline + for filter in self: + # Functions and callable objects (objects with '__call__' method) + if callable(filter): + stream = filter(stream) + + # Normal filters (objects with 'process' method) + else: + stream = filter.process(None, stream) + + # If last filter return a generator, staticalize it inside a list + if isinstance(stream, GeneratorType): + return list(stream) + return stream http://git-wip-us.apache.org/repos/asf/impala/blob/417bc8c8/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py new file mode 100644 index 0000000..7325712 --- /dev/null +++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py @@ -0,0 +1,684 @@ +# -*- coding: utf-8 -*- + +"""This module contains classes representing syntactical elements of SQL.""" + +import re +import sys + +from sqlparse import tokens as T + + +class Token(object): + """Base class for all other classes in this module. + + It represents a single token and has two instance attributes: + ``value`` is the unchange value of the token and ``ttype`` is + the type of the token. + """ + + __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword') + + def __init__(self, ttype, value): + self.value = value + if ttype in T.Keyword: + self.normalized = value.upper() + else: + self.normalized = value + self.ttype = ttype + self.is_keyword = ttype in T.Keyword + self.parent = None + + def __str__(self): + if sys.version_info[0] == 3: + return self.value + else: + return unicode(self).encode('utf-8') + + def __repr__(self): + short = self._get_repr_value() + if sys.version_info[0] < 3: + short = short.encode('utf-8') + return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), + short, id(self)) + + def __unicode__(self): + """Returns a unicode representation of this object.""" + return self.value or '' + + def to_unicode(self): + """Returns a unicode representation of this object. + + .. deprecated:: 0.1.5 + Use ``unicode(token)`` (for Python 3: ``str(token)``) instead. + """ + return unicode(self) + + def _get_repr_name(self): + return str(self.ttype).split('.')[-1] + + def _get_repr_value(self): + raw = unicode(self) + if len(raw) > 7: + raw = raw[:6] + u'...' + return re.sub('\s+', ' ', raw) + + def flatten(self): + """Resolve subgroups.""" + yield self + + def match(self, ttype, values, regex=False): + """Checks whether the token matches the given arguments. + + *ttype* is a token type. If this token doesn't match the given token + type. + *values* is a list of possible values for this token. The values + are OR'ed together so if only one of the values matches ``True`` + is returned. Except for keyword tokens the comparison is + case-sensitive. For convenience it's ok to pass in a single string. + If *regex* is ``True`` (default is ``False``) the given values are + treated as regular expressions. + """ + type_matched = self.ttype is ttype + if not type_matched or values is None: + return type_matched + + if regex: + if isinstance(values, basestring): + values = set([values]) + + if self.ttype is T.Keyword: + values = set(re.compile(v, re.IGNORECASE) for v in values) + else: + values = set(re.compile(v) for v in values) + + for pattern in values: + if pattern.search(self.value): + return True + return False + + if isinstance(values, basestring): + if self.is_keyword: + return values.upper() == self.normalized + return values == self.value + + if self.is_keyword: + for v in values: + if v.upper() == self.normalized: + return True + return False + + return self.value in values + + def is_group(self): + """Returns ``True`` if this object has children.""" + return False + + def is_whitespace(self): + """Return ``True`` if this token is a whitespace token.""" + return self.ttype and self.ttype in T.Whitespace + + def within(self, group_cls): + """Returns ``True`` if this token is within *group_cls*. + + Use this method for example to check if an identifier is within + a function: ``t.within(sql.Function)``. + """ + parent = self.parent + while parent: + if isinstance(parent, group_cls): + return True + parent = parent.parent + return False + + def is_child_of(self, other): + """Returns ``True`` if this token is a direct child of *other*.""" + return self.parent == other + + def has_ancestor(self, other): + """Returns ``True`` if *other* is in this tokens ancestry.""" + parent = self.parent + while parent: + if parent == other: + return True + parent = parent.parent + return False + + +class TokenList(Token): + """A group of tokens. + + It has an additional instance attribute ``tokens`` which holds a + list of child-tokens. + """ + + __slots__ = ('value', 'ttype', 'tokens') + + def __init__(self, tokens=None): + if tokens is None: + tokens = [] + self.tokens = tokens + Token.__init__(self, None, self._to_string()) + + def __unicode__(self): + return self._to_string() + + def __str__(self): + str_ = self._to_string() + if sys.version_info[0] < 2: + str_ = str_.encode('utf-8') + return str_ + + def _to_string(self): + if sys.version_info[0] == 3: + return ''.join(x.value for x in self.flatten()) + else: + return ''.join(unicode(x) for x in self.flatten()) + + def _get_repr_name(self): + return self.__class__.__name__ + + def _pprint_tree(self, max_depth=None, depth=0): + """Pretty-print the object tree.""" + indent = ' ' * (depth * 2) + for idx, token in enumerate(self.tokens): + if token.is_group(): + pre = ' +-' + else: + pre = ' | ' + print '%s%s%d %s \'%s\'' % (indent, pre, idx, + token._get_repr_name(), + token._get_repr_value()) + if (token.is_group() and (max_depth is None or depth < max_depth)): + token._pprint_tree(max_depth, depth + 1) + + def _remove_quotes(self, val): + """Helper that removes surrounding quotes from strings.""" + if not val: + return val + if val[0] in ('"', '\'') and val[-1] == val[0]: + val = val[1:-1] + return val + + def get_token_at_offset(self, offset): + """Returns the token that is on position offset.""" + idx = 0 + for token in self.flatten(): + end = idx + len(token.value) + if idx <= offset <= end: + return token + idx = end + + def flatten(self): + """Generator yielding ungrouped tokens. + + This method is recursively called for all child tokens. + """ + for token in self.tokens: + if isinstance(token, TokenList): + for item in token.flatten(): + yield item + else: + yield token + +# def __iter__(self): +# return self +# +# def next(self): +# for token in self.tokens: +# yield token + + def is_group(self): + return True + + def get_sublists(self): +# return [x for x in self.tokens if isinstance(x, TokenList)] + for x in self.tokens: + if isinstance(x, TokenList): + yield x + + @property + def _groupable_tokens(self): + return self.tokens + + def token_first(self, ignore_whitespace=True, ignore_comments=False): + """Returns the first child token. + + If *ignore_whitespace* is ``True`` (the default), whitespace + tokens are ignored. + + if *ignore_comments* is ``True`` (default: ``False``), comments are + ignored too. + """ + for token in self.tokens: + if ignore_whitespace and token.is_whitespace(): + continue + if ignore_comments and isinstance(token, Comment): + continue + return token + + def token_next_by_instance(self, idx, clss, end=None): + """Returns the next token matching a class. + + *idx* is where to start searching in the list of child tokens. + *clss* is a list of classes the token should be an instance of. + + If no matching token can be found ``None`` is returned. + """ + if not isinstance(clss, (list, tuple)): + clss = (clss,) + + for token in self.tokens[idx:end]: + if isinstance(token, clss): + return token + + def token_next_by_type(self, idx, ttypes): + """Returns next matching token by it's token type.""" + if not isinstance(ttypes, (list, tuple)): + ttypes = [ttypes] + + for token in self.tokens[idx:]: + if token.ttype in ttypes: + return token + + def token_next_match(self, idx, ttype, value, regex=False): + """Returns next token where it's ``match`` method returns ``True``.""" + if not isinstance(idx, int): + idx = self.token_index(idx) + + for n in xrange(idx, len(self.tokens)): + token = self.tokens[n] + if token.match(ttype, value, regex): + return token + + def token_not_matching(self, idx, funcs): + for token in self.tokens[idx:]: + passed = False + for func in funcs: + if func(token): + passed = True + break + + if not passed: + return token + + def token_matching(self, idx, funcs): + for token in self.tokens[idx:]: + for func in funcs: + if func(token): + return token + + def token_prev(self, idx, skip_ws=True): + """Returns the previous token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no previous token. + """ + if idx is None: + return None + + if not isinstance(idx, int): + idx = self.token_index(idx) + + while idx: + idx -= 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_next(self, idx, skip_ws=True): + """Returns the next token relative to *idx*. + + If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + ``None`` is returned if there's no next token. + """ + if idx is None: + return None + + if not isinstance(idx, int): + idx = self.token_index(idx) + + while idx < len(self.tokens) - 1: + idx += 1 + if self.tokens[idx].is_whitespace() and skip_ws: + continue + return self.tokens[idx] + + def token_index(self, token, start=0): + """Return list index of token.""" + if start > 0: + # Performing `index` manually is much faster when starting in the middle + # of the list of tokens and expecting to find the token near to the starting + # index. + for i in xrange(start, len(self.tokens)): + if self.tokens[i] == token: + return i + return -1 + return self.tokens.index(token) + + def tokens_between(self, start, end, exclude_end=False): + """Return all tokens between (and including) start and end. + + If *exclude_end* is ``True`` (default is ``False``) the end token + is included too. + """ + # FIXME(andi): rename exclude_end to inlcude_end + if exclude_end: + offset = 0 + else: + offset = 1 + end_idx = self.token_index(end) + offset + start_idx = self.token_index(start) + return self.tokens[start_idx:end_idx] + + def group_tokens(self, grp_cls, tokens, ignore_ws=False): + """Replace tokens by an instance of *grp_cls*.""" + idx = self.token_index(tokens[0]) + if ignore_ws: + while tokens and tokens[-1].is_whitespace(): + tokens = tokens[:-1] + for t in tokens: + self.tokens.remove(t) + grp = grp_cls(tokens) + for token in tokens: + token.parent = grp + grp.parent = self + self.tokens.insert(idx, grp) + return grp + + def insert_before(self, where, token): + """Inserts *token* before *where*.""" + self.tokens.insert(self.token_index(where), token) + + def insert_after(self, where, token, skip_ws=True): + """Inserts *token* after *where*.""" + next_token = self.token_next(where, skip_ws=skip_ws) + if next_token is None: + self.tokens.append(token) + else: + self.tokens.insert(self.token_index(next_token), token) + + def has_alias(self): + """Returns ``True`` if an alias is present.""" + return self.get_alias() is not None + + def get_alias(self): + """Returns the alias for this identifier or ``None``.""" + + # "name AS alias" + kw = self.token_next_match(0, T.Keyword, 'AS') + if kw is not None: + return self._get_first_name(kw, keywords=True) + + # "name alias" or "complicated column expression alias" + if len(self.tokens) > 2 \ + and self.token_next_by_type(0, T.Whitespace) is not None: + return self._get_first_name(reverse=True) + + return None + + def get_name(self): + """Returns the name of this identifier. + + This is either it's alias or it's real name. The returned valued can + be considered as the name under which the object corresponding to + this identifier is known within the current statement. + """ + alias = self.get_alias() + if alias is not None: + return alias + return self.get_real_name() + + def get_real_name(self): + """Returns the real name (object name) of this identifier.""" + # a.b + dot = self.token_next_match(0, T.Punctuation, '.') + if dot is not None: + return self._get_first_name(self.token_index(dot)) + + return self._get_first_name() + + def get_parent_name(self): + """Return name of the parent object if any. + + A parent object is identified by the first occuring dot. + """ + dot = self.token_next_match(0, T.Punctuation, '.') + if dot is None: + return None + prev_ = self.token_prev(self.token_index(dot)) + if prev_ is None: # something must be verry wrong here.. + return None + return self._remove_quotes(prev_.value) + + def _get_first_name(self, idx=None, reverse=False, keywords=False): + """Returns the name of the first token with a name""" + + if idx and not isinstance(idx, int): + idx = self.token_index(idx) + 1 + + tokens = self.tokens[idx:] if idx else self.tokens + tokens = reversed(tokens) if reverse else tokens + types = [T.Name, T.Wildcard, T.String.Symbol] + + if keywords: + types.append(T.Keyword) + + for tok in tokens: + if tok.ttype in types: + return self._remove_quotes(tok.value) + elif isinstance(tok, Identifier) or isinstance(tok, Function): + return tok.get_name() + return None + +class Statement(TokenList): + """Represents a SQL statement.""" + + __slots__ = ('value', 'ttype', 'tokens') + + def get_type(self): + """Returns the type of a statement. + + The returned value is a string holding an upper-cased reprint of + the first DML or DDL keyword. If the first token in this group + isn't a DML or DDL keyword "UNKNOWN" is returned. + + Whitespaces and comments at the beginning of the statement + are ignored. + """ + first_token = self.token_first(ignore_comments=True) + if first_token is None: + # An "empty" statement that either has not tokens at all + # or only whitespace tokens. + return 'UNKNOWN' + + elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return first_token.normalized + + return 'UNKNOWN' + + +class Identifier(TokenList): + """Represents an identifier. + + Identifiers may have aliases or typecasts. + """ + + __slots__ = ('value', 'ttype', 'tokens') + + def is_wildcard(self): + """Return ``True`` if this identifier contains a wildcard.""" + token = self.token_next_by_type(0, T.Wildcard) + return token is not None + + def get_typecast(self): + """Returns the typecast or ``None`` of this object as a string.""" + marker = self.token_next_match(0, T.Punctuation, '::') + if marker is None: + return None + next_ = self.token_next(self.token_index(marker), False) + if next_ is None: + return None + return unicode(next_) + + def get_ordering(self): + """Returns the ordering or ``None`` as uppercase string.""" + ordering = self.token_next_by_type(0, T.Keyword.Order) + if ordering is None: + return None + return ordering.value.upper() + + def get_array_indices(self): + """Returns an iterator of index token lists""" + + for tok in self.tokens: + if isinstance(tok, SquareBrackets): + # Use [1:-1] index to discard the square brackets + yield tok.tokens[1:-1] + + +class IdentifierList(TokenList): + """A list of :class:`~sqlparse.sql.Identifier`\'s.""" + + __slots__ = ('value', 'ttype', 'tokens') + + def get_identifiers(self): + """Returns the identifiers. + + Whitespaces and punctuations are not included in this generator. + """ + for x in self.tokens: + if not x.is_whitespace() and not x.match(T.Punctuation, ','): + yield x + + +class Parenthesis(TokenList): + """Tokens between parenthesis.""" + __slots__ = ('value', 'ttype', 'tokens') + + @property + def _groupable_tokens(self): + return self.tokens[1:-1] + + +class SquareBrackets(TokenList): + """Tokens between square brackets""" + + __slots__ = ('value', 'ttype', 'tokens') + + @property + def _groupable_tokens(self): + return self.tokens[1:-1] + +class Assignment(TokenList): + """An assignment like 'var := val;'""" + __slots__ = ('value', 'ttype', 'tokens') + + +class If(TokenList): + """An 'if' clause with possible 'else if' or 'else' parts.""" + __slots__ = ('value', 'ttype', 'tokens') + + +class For(TokenList): + """A 'FOR' loop.""" + __slots__ = ('value', 'ttype', 'tokens') + + +class Comparison(TokenList): + """A comparison used for example in WHERE clauses.""" + __slots__ = ('value', 'ttype', 'tokens') + + @property + def left(self): + return self.tokens[0] + + @property + def right(self): + return self.tokens[-1] + + +class Comment(TokenList): + """A comment.""" + __slots__ = ('value', 'ttype', 'tokens') + + def is_multiline(self): + return self.tokens and self.tokens[0].ttype == T.Comment.Multiline + + +class Where(TokenList): + """A WHERE clause.""" + __slots__ = ('value', 'ttype', 'tokens') + + +class Case(TokenList): + """A CASE statement with one or more WHEN and possibly an ELSE part.""" + + __slots__ = ('value', 'ttype', 'tokens') + + def get_cases(self): + """Returns a list of 2-tuples (condition, value). + + If an ELSE exists condition is None. + """ + CONDITION = 1 + VALUE = 2 + + ret = [] + mode = CONDITION + + for token in self.tokens: + # Set mode from the current statement + if token.match(T.Keyword, 'CASE'): + continue + + elif token.match(T.Keyword, 'WHEN'): + ret.append(([], [])) + mode = CONDITION + + elif token.match(T.Keyword, 'THEN'): + mode = VALUE + + elif token.match(T.Keyword, 'ELSE'): + ret.append((None, [])) + mode = VALUE + + elif token.match(T.Keyword, 'END'): + mode = None + + # First condition without preceding WHEN + if mode and not ret: + ret.append(([], [])) + + # Append token depending of the current mode + if mode == CONDITION: + ret[-1][0].append(token) + + elif mode == VALUE: + ret[-1][1].append(token) + + # Return cases list + return ret + + +class Function(TokenList): + """A function or procedure call.""" + + __slots__ = ('value', 'ttype', 'tokens') + + def get_parameters(self): + """Return a list of parameters.""" + parenthesis = self.tokens[-1] + for t in parenthesis.tokens: + if isinstance(t, IdentifierList): + return t.get_identifiers() + elif isinstance(t, Identifier) or \ + isinstance(t, Function) or \ + t.ttype in T.Literal: + return [t,] + return [] + + +class Begin(TokenList): + """A BEGIN/END block.""" + + __slots__ = ('value', 'ttype', 'tokens')
