http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/filters.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/filters.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/filters.py deleted file mode 100644 index e576a26..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/filters.py +++ /dev/null @@ -1,716 +0,0 @@ -# -*- coding: utf-8 -*- - -import re - -from os.path import abspath, join - -from sqlparse import sql, tokens as T -from sqlparse.engine import FilterStack -from sqlparse.lexer import tokenize -from sqlparse.pipeline import Pipeline -from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation, - String, Whitespace) -from sqlparse.utils import memoize_generator -from sqlparse.utils import split_unquoted_newlines - - -# -------------------------- -# token process - -class _CaseFilter: - - ttype = None - - def __init__(self, case=None): - if case is None: - case = 'upper' - assert case in ['lower', 'upper', 'capitalize'] - self.convert = getattr(unicode, case) - - def process(self, stack, stream): - for ttype, value in stream: - if ttype in self.ttype: - value = self.convert(value) - yield ttype, value - - -class KeywordCaseFilter(_CaseFilter): - ttype = T.Keyword - - -class IdentifierCaseFilter(_CaseFilter): - ttype = (T.Name, T.String.Symbol) - - def process(self, stack, stream): - for ttype, value in stream: - if ttype in self.ttype and not value.strip()[0] == '"': - value = self.convert(value) - yield ttype, value - - -class TruncateStringFilter: - - def __init__(self, width, char): - self.width = max(width, 1) - self.char = unicode(char) - - def process(self, stack, stream): - for ttype, value in stream: - if ttype is T.Literal.String.Single: - if value[:2] == '\'\'': - inner = value[2:-2] - quote = u'\'\'' - else: - inner = value[1:-1] - quote = u'\'' - if len(inner) > self.width: - value = u''.join((quote, inner[:self.width], self.char, - quote)) - yield ttype, value - - -class GetComments: - """Get the comments from a stack""" - def process(self, stack, stream): - for token_type, value in stream: - if token_type in Comment: - yield token_type, value - - -class StripComments: - """Strip the comments from a stack""" - def process(self, stack, stream): - for token_type, value in stream: - if token_type not in Comment: - yield token_type, value - - -def StripWhitespace(stream): - "Strip the useless whitespaces from a stream leaving only the minimal ones" - last_type = None - has_space = False - ignore_group = frozenset((Comparison, Punctuation)) - - for token_type, value in stream: - # We got a previous token (not empty first ones) - if last_type: - if token_type in Whitespace: - has_space = True - continue - - # Ignore first empty spaces and dot-commas - elif token_type in (Whitespace, Whitespace.Newline, ignore_group): - continue - - # Yield a whitespace if it can't be ignored - if has_space: - if not ignore_group.intersection((last_type, token_type)): - yield Whitespace, ' ' - has_space = False - - # Yield the token and set its type for checking with the next one - yield token_type, value - last_type = token_type - - -class IncludeStatement: - """Filter that enable a INCLUDE statement""" - - def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False): - if maxrecursive <= 0: - raise ValueError('Max recursion limit reached') - - self.dirpath = abspath(dirpath) - self.maxRecursive = maxrecursive - self.raiseexceptions = raiseexceptions - - self.detected = False - - @memoize_generator - def process(self, stack, stream): - # Run over all tokens in the stream - for token_type, value in stream: - # INCLUDE statement found, set detected mode - if token_type in Name and value.upper() == 'INCLUDE': - self.detected = True - continue - - # INCLUDE statement was found, parse it - elif self.detected: - # Omit whitespaces - if token_type in Whitespace: - continue - - # Found file path to include - if token_type in String.Symbol: -# if token_type in tokens.String.Symbol: - - # Get path of file to include - path = join(self.dirpath, value[1:-1]) - - try: - f = open(path) - raw_sql = f.read() - f.close() - - # There was a problem loading the include file - except IOError, err: - # Raise the exception to the interpreter - if self.raiseexceptions: - raise - - # Put the exception as a comment on the SQL code - yield Comment, u'-- IOError: %s\n' % err - - else: - # Create new FilterStack to parse readed file - # and add all its tokens to the main stack recursively - try: - filtr = IncludeStatement(self.dirpath, - self.maxRecursive - 1, - self.raiseexceptions) - - # Max recursion limit reached - except ValueError, err: - # Raise the exception to the interpreter - if self.raiseexceptions: - raise - - # Put the exception as a comment on the SQL code - yield Comment, u'-- ValueError: %s\n' % err - - stack = FilterStack() - stack.preprocess.append(filtr) - - for tv in stack.run(raw_sql): - yield tv - - # Set normal mode - self.detected = False - - # Don't include any token while in detected mode - continue - - # Normal token - yield token_type, value - - -# ---------------------- -# statement process - -class StripCommentsFilter: - - def _get_next_comment(self, tlist): - # TODO(andi) Comment types should be unified, see related issue38 - token = tlist.token_next_by_instance(0, sql.Comment) - if token is None: - token = tlist.token_next_by_type(0, T.Comment) - return token - - def _process(self, tlist): - token = self._get_next_comment(tlist) - while token: - tidx = tlist.token_index(token) - prev = tlist.token_prev(tidx, False) - next_ = tlist.token_next(tidx, False) - # Replace by whitespace if prev and next exist and if they're not - # whitespaces. This doesn't apply if prev or next is a paranthesis. - if (prev is not None and next_ is not None - and not prev.is_whitespace() and not next_.is_whitespace() - and not (prev.match(T.Punctuation, '(') - or next_.match(T.Punctuation, ')'))): - tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ') - else: - tlist.tokens.pop(tidx) - token = self._get_next_comment(tlist) - - def process(self, stack, stmt): - [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] - self._process(stmt) - - -class StripWhitespaceFilter: - - def _stripws(self, tlist): - func_name = '_stripws_%s' % tlist.__class__.__name__.lower() - func = getattr(self, func_name, self._stripws_default) - func(tlist) - - def _stripws_default(self, tlist): - last_was_ws = False - for token in tlist.tokens: - if token.is_whitespace(): - if last_was_ws: - token.value = '' - else: - token.value = ' ' - last_was_ws = token.is_whitespace() - - def _stripws_identifierlist(self, tlist): - # Removes newlines before commas, see issue140 - last_nl = None - for token in tlist.tokens[:]: - if (token.ttype is T.Punctuation - and token.value == ',' - and last_nl is not None): - tlist.tokens.remove(last_nl) - if token.is_whitespace(): - last_nl = token - else: - last_nl = None - return self._stripws_default(tlist) - - def _stripws_parenthesis(self, tlist): - if tlist.tokens[1].is_whitespace(): - tlist.tokens.pop(1) - if tlist.tokens[-2].is_whitespace(): - tlist.tokens.pop(-2) - self._stripws_default(tlist) - - def process(self, stack, stmt, depth=0): - [self.process(stack, sgroup, depth + 1) - for sgroup in stmt.get_sublists()] - self._stripws(stmt) - if depth == 0 and stmt.tokens[-1].is_whitespace(): - stmt.tokens.pop(-1) - - -class ReindentFilter: - - def __init__(self, width=2, char=' ', line_width=None): - self.width = width - self.char = char - self.indent = 0 - self.offset = 0 - self.line_width = line_width - self._curr_stmt = None - self._last_stmt = None - - def _flatten_up_to_token(self, token): - """Yields all tokens up to token plus the next one.""" - # helper for _get_offset - iterator = self._curr_stmt.flatten() - for t in iterator: - yield t - if t == token: - raise StopIteration - - def _get_offset(self, token): - raw = ''.join(map(unicode, self._flatten_up_to_token(token))) - line = raw.splitlines()[-1] - # Now take current offset into account and return relative offset. - full_offset = len(line) - len(self.char * (self.width * self.indent)) - return full_offset - self.offset - - def nl(self): - # TODO: newline character should be configurable - space = (self.char * ((self.indent * self.width) + self.offset)) - # Detect runaway indenting due to parsing errors - if len(space) > 200: - # something seems to be wrong, flip back - self.indent = self.offset = 0 - space = (self.char * ((self.indent * self.width) + self.offset)) - ws = '\n' + space - return sql.Token(T.Whitespace, ws) - - def _split_kwds(self, tlist): - split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR', - 'GROUP', 'ORDER', 'UNION', 'VALUES', - 'SET', 'BETWEEN', 'EXCEPT') - - def _next_token(i): - t = tlist.token_next_match(i, T.Keyword, split_words, - regex=True) - if t and t.value.upper() == 'BETWEEN': - t = _next_token(tlist.token_index(t) + 1) - if t and t.value.upper() == 'AND': - t = _next_token(tlist.token_index(t) + 1) - return t - - idx = 0 - token = _next_token(idx) - while token: - prev = tlist.token_prev(tlist.token_index(token), False) - offset = 1 - if prev and prev.is_whitespace(): - tlist.tokens.pop(tlist.token_index(prev)) - offset += 1 - if (prev - and isinstance(prev, sql.Comment) - and (unicode(prev).endswith('\n') - or unicode(prev).endswith('\r'))): - nl = tlist.token_next(token) - else: - nl = self.nl() - tlist.insert_before(token, nl) - token = _next_token(tlist.token_index(nl) + offset) - - def _split_statements(self, tlist): - idx = 0 - token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) - while token: - prev = tlist.token_prev(tlist.token_index(token), False) - if prev and prev.is_whitespace(): - tlist.tokens.pop(tlist.token_index(prev)) - # only break if it's not the first token - if prev: - nl = self.nl() - tlist.insert_before(token, nl) - token = tlist.token_next_by_type(tlist.token_index(token) + 1, - (T.Keyword.DDL, T.Keyword.DML)) - - def _process(self, tlist): - func_name = '_process_%s' % tlist.__class__.__name__.lower() - func = getattr(self, func_name, self._process_default) - func(tlist) - - def _process_where(self, tlist): - token = tlist.token_next_match(0, T.Keyword, 'WHERE') - try: - tlist.insert_before(token, self.nl()) - except ValueError: # issue121, errors in statement - pass - self.indent += 1 - self._process_default(tlist) - self.indent -= 1 - - def _process_parenthesis(self, tlist): - first = tlist.token_next(0) - indented = False - if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): - self.indent += 1 - tlist.tokens.insert(0, self.nl()) - indented = True - num_offset = self._get_offset( - tlist.token_next_match(0, T.Punctuation, '(')) - self.offset += num_offset - self._process_default(tlist, stmts=not indented) - if indented: - self.indent -= 1 - self.offset -= num_offset - - def _process_identifierlist(self, tlist): - identifiers = list(tlist.get_identifiers()) - if len(identifiers) > 1 and not tlist.within(sql.Function): - first = list(identifiers[0].flatten())[0] - if self.char == '\t': - # when using tabs we don't count the actual word length - # in spaces. - num_offset = 1 - else: - num_offset = self._get_offset(first) - len(first.value) - self.offset += num_offset - for token in identifiers[1:]: - tlist.insert_before(token, self.nl()) - for token in tlist.tokens: - if isinstance(token, sql.Comment): - tlist.insert_after(token, self.nl()) - self.offset -= num_offset - self._process_default(tlist) - - def _process_case(self, tlist): - is_first = True - num_offset = None - case = tlist.tokens[0] - outer_offset = self._get_offset(case) - len(case.value) - self.offset += outer_offset - for cond, value in tlist.get_cases(): - if is_first: - tcond = list(cond[0].flatten())[0] - is_first = False - num_offset = self._get_offset(tcond) - len(tcond.value) - self.offset += num_offset - continue - if cond is None: - token = value[0] - else: - token = cond[0] - tlist.insert_before(token, self.nl()) - # Line breaks on group level are done. Now let's add an offset of - # 5 (=length of "when", "then", "else") and process subgroups. - self.offset += 5 - self._process_default(tlist) - self.offset -= 5 - if num_offset is not None: - self.offset -= num_offset - end = tlist.token_next_match(0, T.Keyword, 'END') - tlist.insert_before(end, self.nl()) - self.offset -= outer_offset - - def _process_default(self, tlist, stmts=True, kwds=True): - if stmts: - self._split_statements(tlist) - if kwds: - self._split_kwds(tlist) - [self._process(sgroup) for sgroup in tlist.get_sublists()] - - def process(self, stack, stmt): - if isinstance(stmt, sql.Statement): - self._curr_stmt = stmt - self._process(stmt) - if isinstance(stmt, sql.Statement): - if self._last_stmt is not None: - if unicode(self._last_stmt).endswith('\n'): - nl = '\n' - else: - nl = '\n\n' - stmt.tokens.insert( - 0, sql.Token(T.Whitespace, nl)) - if self._last_stmt != stmt: - self._last_stmt = stmt - - -# FIXME: Doesn't work ;) -class RightMarginFilter: - - keep_together = ( - # sql.TypeCast, sql.Identifier, sql.Alias, - ) - - def __init__(self, width=79): - self.width = width - self.line = '' - - def _process(self, stack, group, stream): - for token in stream: - if token.is_whitespace() and '\n' in token.value: - if token.value.endswith('\n'): - self.line = '' - else: - self.line = token.value.splitlines()[-1] - elif (token.is_group() - and not token.__class__ in self.keep_together): - token.tokens = self._process(stack, token, token.tokens) - else: - val = unicode(token) - if len(self.line) + len(val) > self.width: - match = re.search('^ +', self.line) - if match is not None: - indent = match.group() - else: - indent = '' - yield sql.Token(T.Whitespace, '\n%s' % indent) - self.line = indent - self.line += val - yield token - - def process(self, stack, group): - return - group.tokens = self._process(stack, group, group.tokens) - - -class ColumnsSelect: - """Get the columns names of a SELECT query""" - def process(self, stack, stream): - mode = 0 - oldValue = "" - parenthesis = 0 - - for token_type, value in stream: - # Ignore comments - if token_type in Comment: - continue - - # We have not detected a SELECT statement - if mode == 0: - if token_type in Keyword and value == 'SELECT': - mode = 1 - - # We have detected a SELECT statement - elif mode == 1: - if value == 'FROM': - if oldValue: - yield oldValue - - mode = 3 # Columns have been checked - - elif value == 'AS': - oldValue = "" - mode = 2 - - elif (token_type == Punctuation - and value == ',' and not parenthesis): - if oldValue: - yield oldValue - oldValue = "" - - elif token_type not in Whitespace: - if value == '(': - parenthesis += 1 - elif value == ')': - parenthesis -= 1 - - oldValue += value - - # We are processing an AS keyword - elif mode == 2: - # We check also for Keywords because a bug in SQLParse - if token_type == Name or token_type == Keyword: - yield value - mode = 1 - - -# --------------------------- -# postprocess - -class SerializerUnicode: - - def process(self, stack, stmt): - raw = unicode(stmt) - lines = split_unquoted_newlines(raw) - res = '\n'.join(line.rstrip() for line in lines) - return res - - -def Tokens2Unicode(stream): - result = "" - - for _, value in stream: - result += unicode(value) - - return result - - -class OutputFilter: - varname_prefix = '' - - def __init__(self, varname='sql'): - self.varname = self.varname_prefix + varname - self.count = 0 - - def _process(self, stream, varname, has_nl): - raise NotImplementedError - - def process(self, stack, stmt): - self.count += 1 - if self.count > 1: - varname = '%s%d' % (self.varname, self.count) - else: - varname = self.varname - - has_nl = len(unicode(stmt).strip().splitlines()) > 1 - stmt.tokens = self._process(stmt.tokens, varname, has_nl) - return stmt - - -class OutputPythonFilter(OutputFilter): - def _process(self, stream, varname, has_nl): - # SQL query asignation to varname - if self.count > 1: - yield sql.Token(T.Whitespace, '\n') - yield sql.Token(T.Name, varname) - yield sql.Token(T.Whitespace, ' ') - yield sql.Token(T.Operator, '=') - yield sql.Token(T.Whitespace, ' ') - if has_nl: - yield sql.Token(T.Operator, '(') - yield sql.Token(T.Text, "'") - - # Print the tokens on the quote - for token in stream: - # Token is a new line separator - if token.is_whitespace() and '\n' in token.value: - # Close quote and add a new line - yield sql.Token(T.Text, " '") - yield sql.Token(T.Whitespace, '\n') - - # Quote header on secondary lines - yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4)) - yield sql.Token(T.Text, "'") - - # Indentation - after_lb = token.value.split('\n', 1)[1] - if after_lb: - yield sql.Token(T.Whitespace, after_lb) - continue - - # Token has escape chars - elif "'" in token.value: - token.value = token.value.replace("'", "\\'") - - # Put the token - yield sql.Token(T.Text, token.value) - - # Close quote - yield sql.Token(T.Text, "'") - if has_nl: - yield sql.Token(T.Operator, ')') - - -class OutputPHPFilter(OutputFilter): - varname_prefix = '$' - - def _process(self, stream, varname, has_nl): - # SQL query asignation to varname (quote header) - if self.count > 1: - yield sql.Token(T.Whitespace, '\n') - yield sql.Token(T.Name, varname) - yield sql.Token(T.Whitespace, ' ') - if has_nl: - yield sql.Token(T.Whitespace, ' ') - yield sql.Token(T.Operator, '=') - yield sql.Token(T.Whitespace, ' ') - yield sql.Token(T.Text, '"') - - # Print the tokens on the quote - for token in stream: - # Token is a new line separator - if token.is_whitespace() and '\n' in token.value: - # Close quote and add a new line - yield sql.Token(T.Text, ' ";') - yield sql.Token(T.Whitespace, '\n') - - # Quote header on secondary lines - yield sql.Token(T.Name, varname) - yield sql.Token(T.Whitespace, ' ') - yield sql.Token(T.Operator, '.=') - yield sql.Token(T.Whitespace, ' ') - yield sql.Token(T.Text, '"') - - # Indentation - after_lb = token.value.split('\n', 1)[1] - if after_lb: - yield sql.Token(T.Whitespace, after_lb) - continue - - # Token has escape chars - elif '"' in token.value: - token.value = token.value.replace('"', '\\"') - - # Put the token - yield sql.Token(T.Text, token.value) - - # Close quote - yield sql.Token(T.Text, '"') - yield sql.Token(T.Punctuation, ';') - - -class Limit: - """Get the LIMIT of a query. - - If not defined, return -1 (SQL specification for no LIMIT query) - """ - def process(self, stack, stream): - index = 7 - stream = list(stream) - stream.reverse() - - # Run over all tokens in the stream from the end - for token_type, value in stream: - index -= 1 - -# if index and token_type in Keyword: - if index and token_type in Keyword and value == 'LIMIT': - return stream[4 - index][1] - - return -1 - - -def compact(stream): - """Function that return a compacted version of the stream""" - pipe = Pipeline() - - pipe.append(StripComments()) - pipe.append(StripWhitespace) - - return pipe(stream)
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/formatter.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/formatter.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/formatter.py deleted file mode 100644 index 811f5af..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/formatter.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (C) 2008 Andi Albrecht, [email protected] -# -# This module is part of python-sqlparse and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php. - -"""SQL formatter""" - -from sqlparse import filters -from sqlparse.exceptions import SQLParseError - - -def validate_options(options): - """Validates options.""" - kwcase = options.get('keyword_case', None) - if kwcase not in [None, 'upper', 'lower', 'capitalize']: - raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) - - idcase = options.get('identifier_case', None) - if idcase not in [None, 'upper', 'lower', 'capitalize']: - raise SQLParseError('Invalid value for identifier_case: %r' % idcase) - - ofrmt = options.get('output_format', None) - if ofrmt not in [None, 'sql', 'python', 'php']: - raise SQLParseError('Unknown output format: %r' % ofrmt) - - strip_comments = options.get('strip_comments', False) - if strip_comments not in [True, False]: - raise SQLParseError('Invalid value for strip_comments: %r' - % strip_comments) - - strip_ws = options.get('strip_whitespace', False) - if strip_ws not in [True, False]: - raise SQLParseError('Invalid value for strip_whitespace: %r' - % strip_ws) - - truncate_strings = options.get('truncate_strings', None) - if truncate_strings is not None: - try: - truncate_strings = int(truncate_strings) - except (ValueError, TypeError): - raise SQLParseError('Invalid value for truncate_strings: %r' - % truncate_strings) - if truncate_strings <= 1: - raise SQLParseError('Invalid value for truncate_strings: %r' - % truncate_strings) - options['truncate_strings'] = truncate_strings - options['truncate_char'] = options.get('truncate_char', '[...]') - - reindent = options.get('reindent', False) - if reindent not in [True, False]: - raise SQLParseError('Invalid value for reindent: %r' - % reindent) - elif reindent: - options['strip_whitespace'] = True - indent_tabs = options.get('indent_tabs', False) - if indent_tabs not in [True, False]: - raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs) - elif indent_tabs: - options['indent_char'] = '\t' - else: - options['indent_char'] = ' ' - indent_width = options.get('indent_width', 2) - try: - indent_width = int(indent_width) - except (TypeError, ValueError): - raise SQLParseError('indent_width requires an integer') - if indent_width < 1: - raise SQLParseError('indent_width requires an positive integer') - options['indent_width'] = indent_width - - right_margin = options.get('right_margin', None) - if right_margin is not None: - try: - right_margin = int(right_margin) - except (TypeError, ValueError): - raise SQLParseError('right_margin requires an integer') - if right_margin < 10: - raise SQLParseError('right_margin requires an integer > 10') - options['right_margin'] = right_margin - - return options - - -def build_filter_stack(stack, options): - """Setup and return a filter stack. - - Args: - stack: :class:`~sqlparse.filters.FilterStack` instance - options: Dictionary with options validated by validate_options. - """ - # Token filter - if options.get('keyword_case', None): - stack.preprocess.append( - filters.KeywordCaseFilter(options['keyword_case'])) - - if options.get('identifier_case', None): - stack.preprocess.append( - filters.IdentifierCaseFilter(options['identifier_case'])) - - if options.get('truncate_strings', None) is not None: - stack.preprocess.append(filters.TruncateStringFilter( - width=options['truncate_strings'], char=options['truncate_char'])) - - # After grouping - if options.get('strip_comments', False): - stack.enable_grouping() - stack.stmtprocess.append(filters.StripCommentsFilter()) - - if (options.get('strip_whitespace', False) - or options.get('reindent', False)): - stack.enable_grouping() - stack.stmtprocess.append(filters.StripWhitespaceFilter()) - - if options.get('reindent', False): - stack.enable_grouping() - stack.stmtprocess.append( - filters.ReindentFilter(char=options['indent_char'], - width=options['indent_width'])) - - if options.get('right_margin', False): - stack.enable_grouping() - stack.stmtprocess.append( - filters.RightMarginFilter(width=options['right_margin'])) - - # Serializer - if options.get('output_format'): - frmt = options['output_format'] - if frmt.lower() == 'php': - fltr = filters.OutputPHPFilter() - elif frmt.lower() == 'python': - fltr = filters.OutputPythonFilter() - else: - fltr = None - if fltr is not None: - stack.postprocess.append(fltr) - - return stack http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/functions.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/functions.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/functions.py deleted file mode 100644 index e54457e..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/functions.py +++ /dev/null @@ -1,44 +0,0 @@ -''' -Created on 17/05/2012 - -@author: piranna - -Several utility functions to extract info from the SQL sentences -''' - -from sqlparse.filters import ColumnsSelect, Limit -from sqlparse.pipeline import Pipeline -from sqlparse.tokens import Keyword, Whitespace - - -def getlimit(stream): - """Function that return the LIMIT of a input SQL """ - pipe = Pipeline() - - pipe.append(Limit()) - - result = pipe(stream) - try: - return int(result) - except ValueError: - return result - - -def getcolumns(stream): - """Function that return the colums of a SELECT query""" - pipe = Pipeline() - - pipe.append(ColumnsSelect()) - - return pipe(stream) - - -class IsType(object): - """Functor that return is the statement is of a specific type""" - def __init__(self, type): - self.type = type - - def __call__(self, stream): - for token_type, value in stream: - if token_type not in Whitespace: - return token_type in Keyword and value == self.type http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/keywords.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/keywords.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/keywords.py deleted file mode 100644 index 0fb7c00..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/keywords.py +++ /dev/null @@ -1,571 +0,0 @@ -from sqlparse import tokens - -KEYWORDS = { - 'ABORT': tokens.Keyword, - 'ABS': tokens.Keyword, - 'ABSOLUTE': tokens.Keyword, - 'ACCESS': tokens.Keyword, - 'ADA': tokens.Keyword, - 'ADD': tokens.Keyword, - 'ADMIN': tokens.Keyword, - 'AFTER': tokens.Keyword, - 'AGGREGATE': tokens.Keyword, - 'ALIAS': tokens.Keyword, - 'ALL': tokens.Keyword, - 'ALLOCATE': tokens.Keyword, - 'ANALYSE': tokens.Keyword, - 'ANALYZE': tokens.Keyword, - 'ANY': tokens.Keyword, - 'ARE': tokens.Keyword, - 'ASC': tokens.Keyword.Order, - 'ASENSITIVE': tokens.Keyword, - 'ASSERTION': tokens.Keyword, - 'ASSIGNMENT': tokens.Keyword, - 'ASYMMETRIC': tokens.Keyword, - 'AT': tokens.Keyword, - 'ATOMIC': tokens.Keyword, - 'AUTHORIZATION': tokens.Keyword, - 'AVG': tokens.Keyword, - - 'BACKWARD': tokens.Keyword, - 'BEFORE': tokens.Keyword, - 'BEGIN': tokens.Keyword, - 'BETWEEN': tokens.Keyword, - 'BITVAR': tokens.Keyword, - 'BIT_LENGTH': tokens.Keyword, - 'BOTH': tokens.Keyword, - 'BREADTH': tokens.Keyword, - - # 'C': tokens.Keyword, # most likely this is an alias - 'CACHE': tokens.Keyword, - 'CALL': tokens.Keyword, - 'CALLED': tokens.Keyword, - 'CARDINALITY': tokens.Keyword, - 'CASCADE': tokens.Keyword, - 'CASCADED': tokens.Keyword, - 'CAST': tokens.Keyword, - 'CATALOG': tokens.Keyword, - 'CATALOG_NAME': tokens.Keyword, - 'CHAIN': tokens.Keyword, - 'CHARACTERISTICS': tokens.Keyword, - 'CHARACTER_LENGTH': tokens.Keyword, - 'CHARACTER_SET_CATALOG': tokens.Keyword, - 'CHARACTER_SET_NAME': tokens.Keyword, - 'CHARACTER_SET_SCHEMA': tokens.Keyword, - 'CHAR_LENGTH': tokens.Keyword, - 'CHECK': tokens.Keyword, - 'CHECKED': tokens.Keyword, - 'CHECKPOINT': tokens.Keyword, - 'CLASS': tokens.Keyword, - 'CLASS_ORIGIN': tokens.Keyword, - 'CLOB': tokens.Keyword, - 'CLOSE': tokens.Keyword, - 'CLUSTER': tokens.Keyword, - 'COALESCE': tokens.Keyword, - 'COBOL': tokens.Keyword, - 'COLLATE': tokens.Keyword, - 'COLLATION': tokens.Keyword, - 'COLLATION_CATALOG': tokens.Keyword, - 'COLLATION_NAME': tokens.Keyword, - 'COLLATION_SCHEMA': tokens.Keyword, - 'COLLECT': tokens.Keyword, - 'COLUMN': tokens.Keyword, - 'COLUMN_NAME': tokens.Keyword, - 'COMMAND_FUNCTION': tokens.Keyword, - 'COMMAND_FUNCTION_CODE': tokens.Keyword, - 'COMMENT': tokens.Keyword, - 'COMMIT': tokens.Keyword.DML, - 'COMMITTED': tokens.Keyword, - 'COMPLETION': tokens.Keyword, - 'CONDITION_NUMBER': tokens.Keyword, - 'CONNECT': tokens.Keyword, - 'CONNECTION': tokens.Keyword, - 'CONNECTION_NAME': tokens.Keyword, - 'CONSTRAINT': tokens.Keyword, - 'CONSTRAINTS': tokens.Keyword, - 'CONSTRAINT_CATALOG': tokens.Keyword, - 'CONSTRAINT_NAME': tokens.Keyword, - 'CONSTRAINT_SCHEMA': tokens.Keyword, - 'CONSTRUCTOR': tokens.Keyword, - 'CONTAINS': tokens.Keyword, - 'CONTINUE': tokens.Keyword, - 'CONVERSION': tokens.Keyword, - 'CONVERT': tokens.Keyword, - 'COPY': tokens.Keyword, - 'CORRESPONTING': tokens.Keyword, - 'COUNT': tokens.Keyword, - 'CREATEDB': tokens.Keyword, - 'CREATEUSER': tokens.Keyword, - 'CROSS': tokens.Keyword, - 'CUBE': tokens.Keyword, - 'CURRENT': tokens.Keyword, - 'CURRENT_DATE': tokens.Keyword, - 'CURRENT_PATH': tokens.Keyword, - 'CURRENT_ROLE': tokens.Keyword, - 'CURRENT_TIME': tokens.Keyword, - 'CURRENT_TIMESTAMP': tokens.Keyword, - 'CURRENT_USER': tokens.Keyword, - 'CURSOR': tokens.Keyword, - 'CURSOR_NAME': tokens.Keyword, - 'CYCLE': tokens.Keyword, - - 'DATA': tokens.Keyword, - 'DATABASE': tokens.Keyword, - 'DATETIME_INTERVAL_CODE': tokens.Keyword, - 'DATETIME_INTERVAL_PRECISION': tokens.Keyword, - 'DAY': tokens.Keyword, - 'DEALLOCATE': tokens.Keyword, - 'DECLARE': tokens.Keyword, - 'DEFAULT': tokens.Keyword, - 'DEFAULTS': tokens.Keyword, - 'DEFERRABLE': tokens.Keyword, - 'DEFERRED': tokens.Keyword, - 'DEFINED': tokens.Keyword, - 'DEFINER': tokens.Keyword, - 'DELIMITER': tokens.Keyword, - 'DELIMITERS': tokens.Keyword, - 'DEREF': tokens.Keyword, - 'DESC': tokens.Keyword.Order, - 'DESCRIBE': tokens.Keyword, - 'DESCRIPTOR': tokens.Keyword, - 'DESTROY': tokens.Keyword, - 'DESTRUCTOR': tokens.Keyword, - 'DETERMINISTIC': tokens.Keyword, - 'DIAGNOSTICS': tokens.Keyword, - 'DICTIONARY': tokens.Keyword, - 'DISCONNECT': tokens.Keyword, - 'DISPATCH': tokens.Keyword, - 'DO': tokens.Keyword, - 'DOMAIN': tokens.Keyword, - 'DYNAMIC': tokens.Keyword, - 'DYNAMIC_FUNCTION': tokens.Keyword, - 'DYNAMIC_FUNCTION_CODE': tokens.Keyword, - - 'EACH': tokens.Keyword, - 'ENCODING': tokens.Keyword, - 'ENCRYPTED': tokens.Keyword, - 'END-EXEC': tokens.Keyword, - 'EQUALS': tokens.Keyword, - 'ESCAPE': tokens.Keyword, - 'EVERY': tokens.Keyword, - 'EXCEPT': tokens.Keyword, - 'ESCEPTION': tokens.Keyword, - 'EXCLUDING': tokens.Keyword, - 'EXCLUSIVE': tokens.Keyword, - 'EXEC': tokens.Keyword, - 'EXECUTE': tokens.Keyword, - 'EXISTING': tokens.Keyword, - 'EXISTS': tokens.Keyword, - 'EXTERNAL': tokens.Keyword, - 'EXTRACT': tokens.Keyword, - - 'FALSE': tokens.Keyword, - 'FETCH': tokens.Keyword, - 'FINAL': tokens.Keyword, - 'FIRST': tokens.Keyword, - 'FORCE': tokens.Keyword, - 'FOREACH': tokens.Keyword, - 'FOREIGN': tokens.Keyword, - 'FORTRAN': tokens.Keyword, - 'FORWARD': tokens.Keyword, - 'FOUND': tokens.Keyword, - 'FREE': tokens.Keyword, - 'FREEZE': tokens.Keyword, - 'FULL': tokens.Keyword, - 'FUNCTION': tokens.Keyword, - - # 'G': tokens.Keyword, - 'GENERAL': tokens.Keyword, - 'GENERATED': tokens.Keyword, - 'GET': tokens.Keyword, - 'GLOBAL': tokens.Keyword, - 'GO': tokens.Keyword, - 'GOTO': tokens.Keyword, - 'GRANT': tokens.Keyword, - 'GRANTED': tokens.Keyword, - 'GROUPING': tokens.Keyword, - - 'HANDLER': tokens.Keyword, - 'HAVING': tokens.Keyword, - 'HIERARCHY': tokens.Keyword, - 'HOLD': tokens.Keyword, - 'HOST': tokens.Keyword, - - 'IDENTITY': tokens.Keyword, - 'IGNORE': tokens.Keyword, - 'ILIKE': tokens.Keyword, - 'IMMEDIATE': tokens.Keyword, - 'IMMUTABLE': tokens.Keyword, - - 'IMPLEMENTATION': tokens.Keyword, - 'IMPLICIT': tokens.Keyword, - 'INCLUDING': tokens.Keyword, - 'INCREMENT': tokens.Keyword, - 'INDEX': tokens.Keyword, - - 'INDITCATOR': tokens.Keyword, - 'INFIX': tokens.Keyword, - 'INHERITS': tokens.Keyword, - 'INITIALIZE': tokens.Keyword, - 'INITIALLY': tokens.Keyword, - 'INOUT': tokens.Keyword, - 'INPUT': tokens.Keyword, - 'INSENSITIVE': tokens.Keyword, - 'INSTANTIABLE': tokens.Keyword, - 'INSTEAD': tokens.Keyword, - 'INTERSECT': tokens.Keyword, - 'INTO': tokens.Keyword, - 'INVOKER': tokens.Keyword, - 'IS': tokens.Keyword, - 'ISNULL': tokens.Keyword, - 'ISOLATION': tokens.Keyword, - 'ITERATE': tokens.Keyword, - - # 'K': tokens.Keyword, - 'KEY': tokens.Keyword, - 'KEY_MEMBER': tokens.Keyword, - 'KEY_TYPE': tokens.Keyword, - - 'LANCOMPILER': tokens.Keyword, - 'LANGUAGE': tokens.Keyword, - 'LARGE': tokens.Keyword, - 'LAST': tokens.Keyword, - 'LATERAL': tokens.Keyword, - 'LEADING': tokens.Keyword, - 'LENGTH': tokens.Keyword, - 'LESS': tokens.Keyword, - 'LEVEL': tokens.Keyword, - 'LIMIT': tokens.Keyword, - 'LISTEN': tokens.Keyword, - 'LOAD': tokens.Keyword, - 'LOCAL': tokens.Keyword, - 'LOCALTIME': tokens.Keyword, - 'LOCALTIMESTAMP': tokens.Keyword, - 'LOCATION': tokens.Keyword, - 'LOCATOR': tokens.Keyword, - 'LOCK': tokens.Keyword, - 'LOWER': tokens.Keyword, - - # 'M': tokens.Keyword, - 'MAP': tokens.Keyword, - 'MATCH': tokens.Keyword, - 'MAXVALUE': tokens.Keyword, - 'MESSAGE_LENGTH': tokens.Keyword, - 'MESSAGE_OCTET_LENGTH': tokens.Keyword, - 'MESSAGE_TEXT': tokens.Keyword, - 'METHOD': tokens.Keyword, - 'MINUTE': tokens.Keyword, - 'MINVALUE': tokens.Keyword, - 'MOD': tokens.Keyword, - 'MODE': tokens.Keyword, - 'MODIFIES': tokens.Keyword, - 'MODIFY': tokens.Keyword, - 'MONTH': tokens.Keyword, - 'MORE': tokens.Keyword, - 'MOVE': tokens.Keyword, - 'MUMPS': tokens.Keyword, - - 'NAMES': tokens.Keyword, - 'NATIONAL': tokens.Keyword, - 'NATURAL': tokens.Keyword, - 'NCHAR': tokens.Keyword, - 'NCLOB': tokens.Keyword, - 'NEW': tokens.Keyword, - 'NEXT': tokens.Keyword, - 'NO': tokens.Keyword, - 'NOCREATEDB': tokens.Keyword, - 'NOCREATEUSER': tokens.Keyword, - 'NONE': tokens.Keyword, - 'NOT': tokens.Keyword, - 'NOTHING': tokens.Keyword, - 'NOTIFY': tokens.Keyword, - 'NOTNULL': tokens.Keyword, - 'NULL': tokens.Keyword, - 'NULLABLE': tokens.Keyword, - 'NULLIF': tokens.Keyword, - - 'OBJECT': tokens.Keyword, - 'OCTET_LENGTH': tokens.Keyword, - 'OF': tokens.Keyword, - 'OFF': tokens.Keyword, - 'OFFSET': tokens.Keyword, - 'OIDS': tokens.Keyword, - 'OLD': tokens.Keyword, - 'ONLY': tokens.Keyword, - 'OPEN': tokens.Keyword, - 'OPERATION': tokens.Keyword, - 'OPERATOR': tokens.Keyword, - 'OPTION': tokens.Keyword, - 'OPTIONS': tokens.Keyword, - 'ORDINALITY': tokens.Keyword, - 'OUT': tokens.Keyword, - 'OUTPUT': tokens.Keyword, - 'OVERLAPS': tokens.Keyword, - 'OVERLAY': tokens.Keyword, - 'OVERRIDING': tokens.Keyword, - 'OWNER': tokens.Keyword, - - 'PAD': tokens.Keyword, - 'PARAMETER': tokens.Keyword, - 'PARAMETERS': tokens.Keyword, - 'PARAMETER_MODE': tokens.Keyword, - 'PARAMATER_NAME': tokens.Keyword, - 'PARAMATER_ORDINAL_POSITION': tokens.Keyword, - 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword, - 'PARAMETER_SPECIFIC_NAME': tokens.Keyword, - 'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword, - 'PARTIAL': tokens.Keyword, - 'PASCAL': tokens.Keyword, - 'PENDANT': tokens.Keyword, - 'PLACING': tokens.Keyword, - 'PLI': tokens.Keyword, - 'POSITION': tokens.Keyword, - 'POSTFIX': tokens.Keyword, - 'PRECISION': tokens.Keyword, - 'PREFIX': tokens.Keyword, - 'PREORDER': tokens.Keyword, - 'PREPARE': tokens.Keyword, - 'PRESERVE': tokens.Keyword, - 'PRIMARY': tokens.Keyword, - 'PRIOR': tokens.Keyword, - 'PRIVILEGES': tokens.Keyword, - 'PROCEDURAL': tokens.Keyword, - 'PROCEDURE': tokens.Keyword, - 'PUBLIC': tokens.Keyword, - - 'RAISE': tokens.Keyword, - 'READ': tokens.Keyword, - 'READS': tokens.Keyword, - 'RECHECK': tokens.Keyword, - 'RECURSIVE': tokens.Keyword, - 'REF': tokens.Keyword, - 'REFERENCES': tokens.Keyword, - 'REFERENCING': tokens.Keyword, - 'REINDEX': tokens.Keyword, - 'RELATIVE': tokens.Keyword, - 'RENAME': tokens.Keyword, - 'REPEATABLE': tokens.Keyword, - 'RESET': tokens.Keyword, - 'RESTART': tokens.Keyword, - 'RESTRICT': tokens.Keyword, - 'RESULT': tokens.Keyword, - 'RETURN': tokens.Keyword, - 'RETURNED_LENGTH': tokens.Keyword, - 'RETURNED_OCTET_LENGTH': tokens.Keyword, - 'RETURNED_SQLSTATE': tokens.Keyword, - 'RETURNS': tokens.Keyword, - 'REVOKE': tokens.Keyword, - 'RIGHT': tokens.Keyword, - 'ROLE': tokens.Keyword, - 'ROLLBACK': tokens.Keyword.DML, - 'ROLLUP': tokens.Keyword, - 'ROUTINE': tokens.Keyword, - 'ROUTINE_CATALOG': tokens.Keyword, - 'ROUTINE_NAME': tokens.Keyword, - 'ROUTINE_SCHEMA': tokens.Keyword, - 'ROW': tokens.Keyword, - 'ROWS': tokens.Keyword, - 'ROW_COUNT': tokens.Keyword, - 'RULE': tokens.Keyword, - - 'SAVE_POINT': tokens.Keyword, - 'SCALE': tokens.Keyword, - 'SCHEMA': tokens.Keyword, - 'SCHEMA_NAME': tokens.Keyword, - 'SCOPE': tokens.Keyword, - 'SCROLL': tokens.Keyword, - 'SEARCH': tokens.Keyword, - 'SECOND': tokens.Keyword, - 'SECURITY': tokens.Keyword, - 'SELF': tokens.Keyword, - 'SENSITIVE': tokens.Keyword, - 'SERIALIZABLE': tokens.Keyword, - 'SERVER_NAME': tokens.Keyword, - 'SESSION': tokens.Keyword, - 'SESSION_USER': tokens.Keyword, - 'SETOF': tokens.Keyword, - 'SETS': tokens.Keyword, - 'SHARE': tokens.Keyword, - 'SHOW': tokens.Keyword, - 'SIMILAR': tokens.Keyword, - 'SIMPLE': tokens.Keyword, - 'SIZE': tokens.Keyword, - 'SOME': tokens.Keyword, - 'SOURCE': tokens.Keyword, - 'SPACE': tokens.Keyword, - 'SPECIFIC': tokens.Keyword, - 'SPECIFICTYPE': tokens.Keyword, - 'SPECIFIC_NAME': tokens.Keyword, - 'SQL': tokens.Keyword, - 'SQLCODE': tokens.Keyword, - 'SQLERROR': tokens.Keyword, - 'SQLEXCEPTION': tokens.Keyword, - 'SQLSTATE': tokens.Keyword, - 'SQLWARNING': tokens.Keyword, - 'STABLE': tokens.Keyword, - 'START': tokens.Keyword.DML, - 'STATE': tokens.Keyword, - 'STATEMENT': tokens.Keyword, - 'STATIC': tokens.Keyword, - 'STATISTICS': tokens.Keyword, - 'STDIN': tokens.Keyword, - 'STDOUT': tokens.Keyword, - 'STORAGE': tokens.Keyword, - 'STRICT': tokens.Keyword, - 'STRUCTURE': tokens.Keyword, - 'STYPE': tokens.Keyword, - 'SUBCLASS_ORIGIN': tokens.Keyword, - 'SUBLIST': tokens.Keyword, - 'SUBSTRING': tokens.Keyword, - 'SUM': tokens.Keyword, - 'SYMMETRIC': tokens.Keyword, - 'SYSID': tokens.Keyword, - 'SYSTEM': tokens.Keyword, - 'SYSTEM_USER': tokens.Keyword, - - 'TABLE': tokens.Keyword, - 'TABLE_NAME': tokens.Keyword, - 'TEMP': tokens.Keyword, - 'TEMPLATE': tokens.Keyword, - 'TEMPORARY': tokens.Keyword, - 'TERMINATE': tokens.Keyword, - 'THAN': tokens.Keyword, - 'TIMESTAMP': tokens.Keyword, - 'TIMEZONE_HOUR': tokens.Keyword, - 'TIMEZONE_MINUTE': tokens.Keyword, - 'TO': tokens.Keyword, - 'TOAST': tokens.Keyword, - 'TRAILING': tokens.Keyword, - 'TRANSATION': tokens.Keyword, - 'TRANSACTIONS_COMMITTED': tokens.Keyword, - 'TRANSACTIONS_ROLLED_BACK': tokens.Keyword, - 'TRANSATION_ACTIVE': tokens.Keyword, - 'TRANSFORM': tokens.Keyword, - 'TRANSFORMS': tokens.Keyword, - 'TRANSLATE': tokens.Keyword, - 'TRANSLATION': tokens.Keyword, - 'TREAT': tokens.Keyword, - 'TRIGGER': tokens.Keyword, - 'TRIGGER_CATALOG': tokens.Keyword, - 'TRIGGER_NAME': tokens.Keyword, - 'TRIGGER_SCHEMA': tokens.Keyword, - 'TRIM': tokens.Keyword, - 'TRUE': tokens.Keyword, - 'TRUNCATE': tokens.Keyword, - 'TRUSTED': tokens.Keyword, - 'TYPE': tokens.Keyword, - - 'UNCOMMITTED': tokens.Keyword, - 'UNDER': tokens.Keyword, - 'UNENCRYPTED': tokens.Keyword, - 'UNION': tokens.Keyword, - 'UNIQUE': tokens.Keyword, - 'UNKNOWN': tokens.Keyword, - 'UNLISTEN': tokens.Keyword, - 'UNNAMED': tokens.Keyword, - 'UNNEST': tokens.Keyword, - 'UNTIL': tokens.Keyword, - 'UPPER': tokens.Keyword, - 'USAGE': tokens.Keyword, - 'USE': tokens.Keyword, - 'USER': tokens.Keyword, - 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword, - 'USER_DEFINED_TYPE_NAME': tokens.Keyword, - 'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword, - 'USING': tokens.Keyword, - - 'VACUUM': tokens.Keyword, - 'VALID': tokens.Keyword, - 'VALIDATOR': tokens.Keyword, - 'VALUES': tokens.Keyword, - 'VARIABLE': tokens.Keyword, - 'VERBOSE': tokens.Keyword, - 'VERSION': tokens.Keyword, - 'VIEW': tokens.Keyword, - 'VOLATILE': tokens.Keyword, - - 'WHENEVER': tokens.Keyword, - 'WITH': tokens.Keyword, - 'WITHOUT': tokens.Keyword, - 'WORK': tokens.Keyword, - 'WRITE': tokens.Keyword, - - 'YEAR': tokens.Keyword, - - 'ZONE': tokens.Keyword, - - - 'ARRAY': tokens.Name.Builtin, - 'BIGINT': tokens.Name.Builtin, - 'BINARY': tokens.Name.Builtin, - 'BIT': tokens.Name.Builtin, - 'BLOB': tokens.Name.Builtin, - 'BOOLEAN': tokens.Name.Builtin, - 'CHAR': tokens.Name.Builtin, - 'CHARACTER': tokens.Name.Builtin, - 'DATE': tokens.Name.Builtin, - 'DEC': tokens.Name.Builtin, - 'DECIMAL': tokens.Name.Builtin, - 'FLOAT': tokens.Name.Builtin, - 'INT': tokens.Name.Builtin, - 'INTEGER': tokens.Name.Builtin, - 'INTERVAL': tokens.Name.Builtin, - 'LONG': tokens.Name.Builtin, - 'NUMBER': tokens.Name.Builtin, - 'NUMERIC': tokens.Name.Builtin, - 'REAL': tokens.Name.Builtin, - 'SERIAL': tokens.Name.Builtin, - 'SMALLINT': tokens.Name.Builtin, - 'VARCHAR': tokens.Name.Builtin, - 'VARCHAR2': tokens.Name.Builtin, - 'VARYING': tokens.Name.Builtin, - 'INT8': tokens.Name.Builtin, - 'SERIAL8': tokens.Name.Builtin, - 'TEXT': tokens.Name.Builtin, -} - - -KEYWORDS_COMMON = { - 'SELECT': tokens.Keyword.DML, - 'INSERT': tokens.Keyword.DML, - 'DELETE': tokens.Keyword.DML, - 'UPDATE': tokens.Keyword.DML, - 'REPLACE': tokens.Keyword.DML, - 'MERGE': tokens.Keyword.DML, - 'DROP': tokens.Keyword.DDL, - 'CREATE': tokens.Keyword.DDL, - 'ALTER': tokens.Keyword.DDL, - - 'WHERE': tokens.Keyword, - 'FROM': tokens.Keyword, - 'INNER': tokens.Keyword, - 'JOIN': tokens.Keyword, - 'STRAIGHT_JOIN': tokens.Keyword, - 'AND': tokens.Keyword, - 'OR': tokens.Keyword, - 'LIKE': tokens.Keyword, - 'ON': tokens.Keyword, - 'IN': tokens.Keyword, - 'SET': tokens.Keyword, - - 'BY': tokens.Keyword, - 'GROUP': tokens.Keyword, - 'ORDER': tokens.Keyword, - 'LEFT': tokens.Keyword, - 'OUTER': tokens.Keyword, - 'FULL': tokens.Keyword, - - 'IF': tokens.Keyword, - 'END': tokens.Keyword, - 'THEN': tokens.Keyword, - 'LOOP': tokens.Keyword, - 'AS': tokens.Keyword, - 'ELSE': tokens.Keyword, - 'FOR': tokens.Keyword, - - 'CASE': tokens.Keyword, - 'WHEN': tokens.Keyword, - 'MIN': tokens.Keyword, - 'MAX': tokens.Keyword, - 'DISTINCT': tokens.Keyword, -} http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/lexer.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/lexer.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/lexer.py deleted file mode 100644 index b7a33ff..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/lexer.py +++ /dev/null @@ -1,350 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright (C) 2008 Andi Albrecht, [email protected] -# -# This module is part of python-sqlparse and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php. - -"""SQL Lexer""" - -# This code is based on the SqlLexer in pygments. -# http://pygments.org/ -# It's separated from the rest of pygments to increase performance -# and to allow some customizations. - -import re -import sys - -from sqlparse import tokens -from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON -from cStringIO import StringIO - - -class include(str): - pass - - -class combined(tuple): - """Indicates a state combined from multiple states.""" - - def __new__(cls, *args): - return tuple.__new__(cls, args) - - def __init__(self, *args): - # tuple.__init__ doesn't do anything - pass - - -def is_keyword(value): - test = value.upper() - return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value - - -def apply_filters(stream, filters, lexer=None): - """ - Use this method to apply an iterable of filters to - a stream. If lexer is given it's forwarded to the - filter, otherwise the filter receives `None`. - """ - - def _apply(filter_, stream): - for token in filter_.filter(lexer, stream): - yield token - - for filter_ in filters: - stream = _apply(filter_, stream) - return stream - - -class LexerMeta(type): - """ - Metaclass for Lexer, creates the self._tokens attribute from - self.tokens on the first instantiation. - """ - - def _process_state(cls, unprocessed, processed, state): - assert type(state) is str, "wrong state name %r" % state - assert state[0] != '#', "invalid state name %r" % state - if state in processed: - return processed[state] - tokenlist = processed[state] = [] - rflags = cls.flags - for tdef in unprocessed[state]: - if isinstance(tdef, include): - # it's a state reference - assert tdef != state, "circular state reference %r" % state - tokenlist.extend(cls._process_state( - unprocessed, processed, str(tdef))) - continue - - assert type(tdef) is tuple, "wrong rule def %r" % tdef - - try: - rex = re.compile(tdef[0], rflags).match - except Exception, err: - raise ValueError(("uncompilable regex %r in state" - " %r of %r: %s" - % (tdef[0], state, cls, err))) - - assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \ - ('token type must be simple type or callable, not %r' - % (tdef[1],)) - - if len(tdef) == 2: - new_state = None - else: - tdef2 = tdef[2] - if isinstance(tdef2, str): - # an existing state - if tdef2 == '#pop': - new_state = -1 - elif tdef2 in unprocessed: - new_state = (tdef2,) - elif tdef2 == '#push': - new_state = tdef2 - elif tdef2[:5] == '#pop:': - new_state = -int(tdef2[5:]) - else: - assert False, 'unknown new state %r' % tdef2 - elif isinstance(tdef2, combined): - # combine a new state from existing ones - new_state = '_tmp_%d' % cls._tmpname - cls._tmpname += 1 - itokens = [] - for istate in tdef2: - assert istate != state, \ - 'circular state ref %r' % istate - itokens.extend(cls._process_state(unprocessed, - processed, istate)) - processed[new_state] = itokens - new_state = (new_state,) - elif isinstance(tdef2, tuple): - # push more than one state - for state in tdef2: - assert (state in unprocessed or - state in ('#pop', '#push')), \ - 'unknown new state ' + state - new_state = tdef2 - else: - assert False, 'unknown new state def %r' % tdef2 - tokenlist.append((rex, tdef[1], new_state)) - return tokenlist - - def process_tokendef(cls): - cls._all_tokens = {} - cls._tmpname = 0 - processed = cls._all_tokens[cls.__name__] = {} - #tokendefs = tokendefs or cls.tokens[name] - for state in cls.tokens.keys(): - cls._process_state(cls.tokens, processed, state) - return processed - - def __call__(cls, *args, **kwds): - if not hasattr(cls, '_tokens'): - cls._all_tokens = {} - cls._tmpname = 0 - if hasattr(cls, 'token_variants') and cls.token_variants: - # don't process yet - pass - else: - cls._tokens = cls.process_tokendef() - - return type.__call__(cls, *args, **kwds) - - -class Lexer(object): - - __metaclass__ = LexerMeta - - encoding = 'utf-8' - stripall = False - stripnl = False - tabsize = 0 - flags = re.IGNORECASE | re.UNICODE - - tokens = { - 'root': [ - (r'--.*?(\r\n|\r|\n)', tokens.Comment.Single), - # $ matches *before* newline, therefore we have two patterns - # to match Comment.Single - (r'--.*?$', tokens.Comment.Single), - (r'(\r\n|\r|\n)', tokens.Newline), - (r'\s+', tokens.Whitespace), - (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), - (r':=', tokens.Assignment), - (r'::', tokens.Punctuation), - (r'[*]', tokens.Wildcard), - (r'CASE\b', tokens.Keyword), # extended CASE(foo) - (r"`(``|[^`])*`", tokens.Name), - (r"´(´´|[^´])*´", tokens.Name), - (r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin), - (r'\?{1}', tokens.Name.Placeholder), - (r'%\(\w+\)s', tokens.Name.Placeholder), - (r'%s', tokens.Name.Placeholder), - (r'[$:?]\w+', tokens.Name.Placeholder), - # FIXME(andi): VALUES shouldn't be listed here - # see https://github.com/andialbrecht/sqlparse/pull/64 - (r'VALUES', tokens.Keyword), - (r'@[^\W\d_]\w+', tokens.Name), - (r'[^\W\d_]\w*(?=[.(])', tokens.Name), # see issue39 - (r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal), - (r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float), - (r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float), - (r'[-]?[0-9]+', tokens.Number.Integer), - (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), - # not a real string literal in ANSI SQL: - (r'(""|".*?[^\\]")', tokens.String.Symbol), - (r'(\[.*[^\]]\])', tokens.Name), - (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), - (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword), - (r'NOT NULL\b', tokens.Keyword), - (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), - (r'(?<=\.)[^\W\d_]\w*', tokens.Name), - (r'[^\W\d_]\w*', is_keyword), - (r'[;:()\[\],\.]', tokens.Punctuation), - (r'[<>=~!]+', tokens.Operator.Comparison), - (r'[+/@#%^&|`?^-]+', tokens.Operator), - ], - 'multiline-comments': [ - (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), - (r'\*/', tokens.Comment.Multiline, '#pop'), - (r'[^/\*]+', tokens.Comment.Multiline), - (r'[/*]', tokens.Comment.Multiline), - ]} - - def __init__(self): - self.filters = [] - - def add_filter(self, filter_, **options): - from sqlparse.filters import Filter - if not isinstance(filter_, Filter): - filter_ = filter_(**options) - self.filters.append(filter_) - - def _decode(self, text): - if sys.version_info[0] == 3: - if isinstance(text, str): - return text - if self.encoding == 'guess': - try: - text = text.decode('utf-8') - if text.startswith(u'\ufeff'): - text = text[len(u'\ufeff'):] - except UnicodeDecodeError: - text = text.decode('latin1') - else: - try: - text = text.decode(self.encoding) - except UnicodeDecodeError: - text = text.decode('unicode-escape') - - if self.tabsize > 0: - text = text.expandtabs(self.tabsize) - return text - - def get_tokens(self, text, unfiltered=False): - """ - Return an iterable of (tokentype, value) pairs generated from - `text`. If `unfiltered` is set to `True`, the filtering mechanism - is bypassed even if filters are defined. - - Also preprocess the text, i.e. expand tabs and strip it if - wanted and applies registered filters. - """ - if isinstance(text, basestring): - if self.stripall: - text = text.strip() - elif self.stripnl: - text = text.strip('\n') - - if sys.version_info[0] < 3 and isinstance(text, unicode): - text = StringIO(text.encode('utf-8')) - self.encoding = 'utf-8' - else: - text = StringIO(text) - - def streamer(): - for i, t, v in self.get_tokens_unprocessed(text): - yield t, v - stream = streamer() - if not unfiltered: - stream = apply_filters(stream, self.filters, self) - return stream - - def get_tokens_unprocessed(self, stream, stack=('root',)): - """ - Split ``text`` into (tokentype, text) pairs. - - ``stack`` is the inital stack (default: ``['root']``) - """ - pos = 0 - tokendefs = self._tokens # see __call__, pylint:disable=E1101 - statestack = list(stack) - statetokens = tokendefs[statestack[-1]] - known_names = {} - - text = stream.read() - text = self._decode(text) - - while 1: - for rexmatch, action, new_state in statetokens: - m = rexmatch(text, pos) - if m: - # print rex.pattern - value = m.group() - if value in known_names: - yield pos, known_names[value], value - elif type(action) is tokens._TokenType: - yield pos, action, value - elif hasattr(action, '__call__'): - ttype, value = action(value) - known_names[value] = ttype - yield pos, ttype, value - else: - for item in action(self, m): - yield item - pos = m.end() - if new_state is not None: - # state transition - if isinstance(new_state, tuple): - for state in new_state: - if state == '#pop': - statestack.pop() - elif state == '#push': - statestack.append(statestack[-1]) - else: - statestack.append(state) - elif isinstance(new_state, int): - # pop - del statestack[new_state:] - elif new_state == '#push': - statestack.append(statestack[-1]) - else: - assert False, "wrong state def: %r" % new_state - statetokens = tokendefs[statestack[-1]] - break - else: - try: - if text[pos] == '\n': - # at EOL, reset state to "root" - pos += 1 - statestack = ['root'] - statetokens = tokendefs['root'] - yield pos, tokens.Text, u'\n' - continue - yield pos, tokens.Error, text[pos] - pos += 1 - except IndexError: - break - - -def tokenize(sql, encoding=None): - """Tokenize sql. - - Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream - of ``(token type, value)`` items. - """ - lexer = Lexer() - if encoding is not None: - lexer.encoding = encoding - return lexer.get_tokens(sql) http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/pipeline.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/pipeline.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/pipeline.py deleted file mode 100644 index 34dad19..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/pipeline.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2011 Jesus Leganes "piranna", [email protected] -# -# This module is part of python-sqlparse and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php. - -from types import GeneratorType - - -class Pipeline(list): - """Pipeline to process filters sequentially""" - - def __call__(self, stream): - """Run the pipeline - - Return a static (non generator) version of the result - """ - - # Run the stream over all the filters on the pipeline - for filter in self: - # Functions and callable objects (objects with '__call__' method) - if callable(filter): - stream = filter(stream) - - # Normal filters (objects with 'process' method) - else: - stream = filter.process(None, stream) - - # If last filter return a generator, staticalize it inside a list - if isinstance(stream, GeneratorType): - return list(stream) - return stream http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/sql.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/sql.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/sql.py deleted file mode 100644 index b8e4090..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/sql.py +++ /dev/null @@ -1,639 +0,0 @@ -# -*- coding: utf-8 -*- - -"""This module contains classes representing syntactical elements of SQL.""" - -import re -import sys - -from sqlparse import tokens as T - - -class Token(object): - """Base class for all other classes in this module. - - It represents a single token and has two instance attributes: - ``value`` is the unchange value of the token and ``ttype`` is - the type of the token. - """ - - __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword') - - def __init__(self, ttype, value): - self.value = value - if ttype in T.Keyword: - self.normalized = value.upper() - else: - self.normalized = value - self.ttype = ttype - self.is_keyword = ttype in T.Keyword - self.parent = None - - def __str__(self): - if sys.version_info[0] == 3: - return self.value - else: - return unicode(self).encode('utf-8') - - def __repr__(self): - short = self._get_repr_value() - if sys.version_info[0] < 3: - short = short.encode('utf-8') - return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), - short, id(self)) - - def __unicode__(self): - """Returns a unicode representation of this object.""" - return self.value or '' - - def to_unicode(self): - """Returns a unicode representation of this object. - - .. deprecated:: 0.1.5 - Use ``unicode(token)`` (for Python 3: ``str(token)``) instead. - """ - return unicode(self) - - def _get_repr_name(self): - return str(self.ttype).split('.')[-1] - - def _get_repr_value(self): - raw = unicode(self) - if len(raw) > 7: - raw = raw[:6] + u'...' - return re.sub('\s+', ' ', raw) - - def flatten(self): - """Resolve subgroups.""" - yield self - - def match(self, ttype, values, regex=False): - """Checks whether the token matches the given arguments. - - *ttype* is a token type. If this token doesn't match the given token - type. - *values* is a list of possible values for this token. The values - are OR'ed together so if only one of the values matches ``True`` - is returned. Except for keyword tokens the comparison is - case-sensitive. For convenience it's ok to pass in a single string. - If *regex* is ``True`` (default is ``False``) the given values are - treated as regular expressions. - """ - type_matched = self.ttype is ttype - if not type_matched or values is None: - return type_matched - - if regex: - if isinstance(values, basestring): - values = set([values]) - - if self.ttype is T.Keyword: - values = set(re.compile(v, re.IGNORECASE) for v in values) - else: - values = set(re.compile(v) for v in values) - - for pattern in values: - if pattern.search(self.value): - return True - return False - - if isinstance(values, basestring): - if self.is_keyword: - return values.upper() == self.normalized - return values == self.value - - if self.is_keyword: - for v in values: - if v.upper() == self.normalized: - return True - return False - - return self.value in values - - def is_group(self): - """Returns ``True`` if this object has children.""" - return False - - def is_whitespace(self): - """Return ``True`` if this token is a whitespace token.""" - return self.ttype and self.ttype in T.Whitespace - - def within(self, group_cls): - """Returns ``True`` if this token is within *group_cls*. - - Use this method for example to check if an identifier is within - a function: ``t.within(sql.Function)``. - """ - parent = self.parent - while parent: - if isinstance(parent, group_cls): - return True - parent = parent.parent - return False - - def is_child_of(self, other): - """Returns ``True`` if this token is a direct child of *other*.""" - return self.parent == other - - def has_ancestor(self, other): - """Returns ``True`` if *other* is in this tokens ancestry.""" - parent = self.parent - while parent: - if parent == other: - return True - parent = parent.parent - return False - - -class TokenList(Token): - """A group of tokens. - - It has an additional instance attribute ``tokens`` which holds a - list of child-tokens. - """ - - __slots__ = ('value', 'ttype', 'tokens') - - def __init__(self, tokens=None): - if tokens is None: - tokens = [] - self.tokens = tokens - Token.__init__(self, None, self._to_string()) - - def __unicode__(self): - return self._to_string() - - def __str__(self): - str_ = self._to_string() - if sys.version_info[0] < 2: - str_ = str_.encode('utf-8') - return str_ - - def _to_string(self): - if sys.version_info[0] == 3: - return ''.join(x.value for x in self.flatten()) - else: - return ''.join(unicode(x) for x in self.flatten()) - - def _get_repr_name(self): - return self.__class__.__name__ - - def _pprint_tree(self, max_depth=None, depth=0): - """Pretty-print the object tree.""" - indent = ' ' * (depth * 2) - for idx, token in enumerate(self.tokens): - if token.is_group(): - pre = ' +-' - else: - pre = ' | ' - print '%s%s%d %s \'%s\'' % (indent, pre, idx, - token._get_repr_name(), - token._get_repr_value()) - if (token.is_group() and (max_depth is None or depth < max_depth)): - token._pprint_tree(max_depth, depth + 1) - - def _remove_quotes(self, val): - """Helper that removes surrounding quotes from strings.""" - if not val: - return val - if val[0] in ('"', '\'') and val[-1] == val[0]: - val = val[1:-1] - return val - - def get_token_at_offset(self, offset): - """Returns the token that is on position offset.""" - idx = 0 - for token in self.flatten(): - end = idx + len(token.value) - if idx <= offset <= end: - return token - idx = end - - def flatten(self): - """Generator yielding ungrouped tokens. - - This method is recursively called for all child tokens. - """ - for token in self.tokens: - if isinstance(token, TokenList): - for item in token.flatten(): - yield item - else: - yield token - -# def __iter__(self): -# return self -# -# def next(self): -# for token in self.tokens: -# yield token - - def is_group(self): - return True - - def get_sublists(self): -# return [x for x in self.tokens if isinstance(x, TokenList)] - for x in self.tokens: - if isinstance(x, TokenList): - yield x - - @property - def _groupable_tokens(self): - return self.tokens - - def token_first(self, ignore_whitespace=True): - """Returns the first child token. - - If *ignore_whitespace* is ``True`` (the default), whitespace - tokens are ignored. - """ - for token in self.tokens: - if ignore_whitespace and token.is_whitespace(): - continue - return token - - def token_next_by_instance(self, idx, clss): - """Returns the next token matching a class. - - *idx* is where to start searching in the list of child tokens. - *clss* is a list of classes the token should be an instance of. - - If no matching token can be found ``None`` is returned. - """ - if not isinstance(clss, (list, tuple)): - clss = (clss,) - - for token in self.tokens[idx:]: - if isinstance(token, clss): - return token - - def token_next_by_type(self, idx, ttypes): - """Returns next matching token by it's token type.""" - if not isinstance(ttypes, (list, tuple)): - ttypes = [ttypes] - - for token in self.tokens[idx:]: - if token.ttype in ttypes: - return token - - def token_next_match(self, idx, ttype, value, regex=False): - """Returns next token where it's ``match`` method returns ``True``.""" - if not isinstance(idx, int): - idx = self.token_index(idx) - - for n in xrange(idx, len(self.tokens)): - token = self.tokens[n] - if token.match(ttype, value, regex): - return token - - def token_not_matching(self, idx, funcs): - for token in self.tokens[idx:]: - passed = False - for func in funcs: - if func(token): - passed = True - break - - if not passed: - return token - - def token_matching(self, idx, funcs): - for token in self.tokens[idx:]: - for func in funcs: - if func(token): - return token - - def token_prev(self, idx, skip_ws=True): - """Returns the previous token relative to *idx*. - - If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. - ``None`` is returned if there's no previous token. - """ - if idx is None: - return None - - if not isinstance(idx, int): - idx = self.token_index(idx) - - while idx: - idx -= 1 - if self.tokens[idx].is_whitespace() and skip_ws: - continue - return self.tokens[idx] - - def token_next(self, idx, skip_ws=True): - """Returns the next token relative to *idx*. - - If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. - ``None`` is returned if there's no next token. - """ - if idx is None: - return None - - if not isinstance(idx, int): - idx = self.token_index(idx) - - while idx < len(self.tokens) - 1: - idx += 1 - if self.tokens[idx].is_whitespace() and skip_ws: - continue - return self.tokens[idx] - - def token_index(self, token): - """Return list index of token.""" - return self.tokens.index(token) - - def tokens_between(self, start, end, exclude_end=False): - """Return all tokens between (and including) start and end. - - If *exclude_end* is ``True`` (default is ``False``) the end token - is included too. - """ - # FIXME(andi): rename exclude_end to inlcude_end - if exclude_end: - offset = 0 - else: - offset = 1 - end_idx = self.token_index(end) + offset - start_idx = self.token_index(start) - return self.tokens[start_idx:end_idx] - - def group_tokens(self, grp_cls, tokens, ignore_ws=False): - """Replace tokens by an instance of *grp_cls*.""" - idx = self.token_index(tokens[0]) - if ignore_ws: - while tokens and tokens[-1].is_whitespace(): - tokens = tokens[:-1] - for t in tokens: - self.tokens.remove(t) - grp = grp_cls(tokens) - for token in tokens: - token.parent = grp - grp.parent = self - self.tokens.insert(idx, grp) - return grp - - def insert_before(self, where, token): - """Inserts *token* before *where*.""" - self.tokens.insert(self.token_index(where), token) - - def insert_after(self, where, token, skip_ws=True): - """Inserts *token* after *where*.""" - next_token = self.token_next(where, skip_ws=skip_ws) - if next_token is None: - self.tokens.append(token) - else: - self.tokens.insert(self.token_index(next_token), token) - - def has_alias(self): - """Returns ``True`` if an alias is present.""" - return self.get_alias() is not None - - def get_alias(self): - """Returns the alias for this identifier or ``None``.""" - kw = self.token_next_match(0, T.Keyword, 'AS') - if kw is not None: - alias = self.token_next(self.token_index(kw)) - if alias is None: - return None - else: - next_ = self.token_next_by_instance(0, Identifier) - if next_ is None: - next_ = self.token_next_by_type(0, T.String.Symbol) - if next_ is None: - return None - alias = next_ - if isinstance(alias, Identifier): - return alias.get_name() - return self._remove_quotes(unicode(alias)) - - def get_name(self): - """Returns the name of this identifier. - - This is either it's alias or it's real name. The returned valued can - be considered as the name under which the object corresponding to - this identifier is known within the current statement. - """ - alias = self.get_alias() - if alias is not None: - return alias - return self.get_real_name() - - def get_real_name(self): - """Returns the real name (object name) of this identifier.""" - # a.b - dot = self.token_next_match(0, T.Punctuation, '.') - if dot is None: - next_ = self.token_next_by_type(0, T.Name) - if next_ is not None: - return self._remove_quotes(next_.value) - return None - - next_ = self.token_next_by_type(self.token_index(dot), - (T.Name, T.Wildcard, T.String.Symbol)) - if next_ is None: # invalid identifier, e.g. "a." - return None - return self._remove_quotes(next_.value) - - -class Statement(TokenList): - """Represents a SQL statement.""" - - __slots__ = ('value', 'ttype', 'tokens') - - def get_type(self): - """Returns the type of a statement. - - The returned value is a string holding an upper-cased reprint of - the first DML or DDL keyword. If the first token in this group - isn't a DML or DDL keyword "UNKNOWN" is returned. - """ - first_token = self.token_first() - if first_token is None: - # An "empty" statement that either has not tokens at all - # or only whitespace tokens. - return 'UNKNOWN' - - elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): - return first_token.normalized - - return 'UNKNOWN' - - -class Identifier(TokenList): - """Represents an identifier. - - Identifiers may have aliases or typecasts. - """ - - __slots__ = ('value', 'ttype', 'tokens') - - def get_parent_name(self): - """Return name of the parent object if any. - - A parent object is identified by the first occuring dot. - """ - dot = self.token_next_match(0, T.Punctuation, '.') - if dot is None: - return None - prev_ = self.token_prev(self.token_index(dot)) - if prev_ is None: # something must be verry wrong here.. - return None - return self._remove_quotes(prev_.value) - - def is_wildcard(self): - """Return ``True`` if this identifier contains a wildcard.""" - token = self.token_next_by_type(0, T.Wildcard) - return token is not None - - def get_typecast(self): - """Returns the typecast or ``None`` of this object as a string.""" - marker = self.token_next_match(0, T.Punctuation, '::') - if marker is None: - return None - next_ = self.token_next(self.token_index(marker), False) - if next_ is None: - return None - return unicode(next_) - - def get_ordering(self): - """Returns the ordering or ``None`` as uppercase string.""" - ordering = self.token_next_by_type(0, T.Keyword.Order) - if ordering is None: - return None - return ordering.value.upper() - - -class IdentifierList(TokenList): - """A list of :class:`~sqlparse.sql.Identifier`\'s.""" - - __slots__ = ('value', 'ttype', 'tokens') - - def get_identifiers(self): - """Returns the identifiers. - - Whitespaces and punctuations are not included in this generator. - """ - for x in self.tokens: - if not x.is_whitespace() and not x.match(T.Punctuation, ','): - yield x - - -class Parenthesis(TokenList): - """Tokens between parenthesis.""" - __slots__ = ('value', 'ttype', 'tokens') - - @property - def _groupable_tokens(self): - return self.tokens[1:-1] - - -class Assignment(TokenList): - """An assignment like 'var := val;'""" - __slots__ = ('value', 'ttype', 'tokens') - - -class If(TokenList): - """An 'if' clause with possible 'else if' or 'else' parts.""" - __slots__ = ('value', 'ttype', 'tokens') - - -class For(TokenList): - """A 'FOR' loop.""" - __slots__ = ('value', 'ttype', 'tokens') - - -class Comparison(TokenList): - """A comparison used for example in WHERE clauses.""" - __slots__ = ('value', 'ttype', 'tokens') - - @property - def left(self): - return self.tokens[0] - - @property - def right(self): - return self.tokens[-1] - - -class Comment(TokenList): - """A comment.""" - __slots__ = ('value', 'ttype', 'tokens') - - -class Where(TokenList): - """A WHERE clause.""" - __slots__ = ('value', 'ttype', 'tokens') - - -class Case(TokenList): - """A CASE statement with one or more WHEN and possibly an ELSE part.""" - - __slots__ = ('value', 'ttype', 'tokens') - - def get_cases(self): - """Returns a list of 2-tuples (condition, value). - - If an ELSE exists condition is None. - """ - CONDITION = 1 - VALUE = 2 - - ret = [] - mode = CONDITION - - for token in self.tokens: - # Set mode from the current statement - if token.match(T.Keyword, 'CASE'): - continue - - elif token.match(T.Keyword, 'WHEN'): - ret.append(([], [])) - mode = CONDITION - - elif token.match(T.Keyword, 'THEN'): - mode = VALUE - - elif token.match(T.Keyword, 'ELSE'): - ret.append((None, [])) - mode = VALUE - - elif token.match(T.Keyword, 'END'): - mode = None - - # First condition without preceding WHEN - if mode and not ret: - ret.append(([], [])) - - # Append token depending of the current mode - if mode == CONDITION: - ret[-1][0].append(token) - - elif mode == VALUE: - ret[-1][1].append(token) - - # Return cases list - return ret - - -class Function(TokenList): - """A function or procedure call.""" - - __slots__ = ('value', 'ttype', 'tokens') - - def get_parameters(self): - """Return a list of parameters.""" - parenthesis = self.tokens[-1] - for t in parenthesis.tokens: - if isinstance(t, IdentifierList): - return t.get_identifiers() - elif isinstance(t, Identifier) or \ - isinstance(t, Function) or \ - t.ttype in T.Literal: - return [t,] - return [] - - -class Begin(TokenList): - """A BEGIN/END block.""" - - __slots__ = ('value', 'ttype', 'tokens') http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.14/sqlparse/tokens.py ---------------------------------------------------------------------- diff --git a/shell/ext-py/sqlparse-0.1.14/sqlparse/tokens.py b/shell/ext-py/sqlparse-0.1.14/sqlparse/tokens.py deleted file mode 100644 index 01a9b89..0000000 --- a/shell/ext-py/sqlparse-0.1.14/sqlparse/tokens.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (C) 2008 Andi Albrecht, [email protected] -# -# This module is part of python-sqlparse and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php. - -# The Token implementation is based on pygment's token system written -# by Georg Brandl. -# http://pygments.org/ - -"""Tokens""" - - -class _TokenType(tuple): - parent = None - - def split(self): - buf = [] - node = self - while node is not None: - buf.append(node) - node = node.parent - buf.reverse() - return buf - - def __contains__(self, val): - return val is not None and (self is val or val[:len(self)] == self) - - def __getattr__(self, val): - if not val or not val[0].isupper(): - return tuple.__getattribute__(self, val) - new = _TokenType(self + (val,)) - setattr(self, val, new) - new.parent = self - return new - - def __hash__(self): - return hash(tuple(self)) - - def __repr__(self): - return 'Token' + (self and '.' or '') + '.'.join(self) - - -Token = _TokenType() - -# Special token types -Text = Token.Text -Whitespace = Text.Whitespace -Newline = Whitespace.Newline -Error = Token.Error -# Text that doesn't belong to this lexer (e.g. HTML in PHP) -Other = Token.Other - -# Common token types for source code -Keyword = Token.Keyword -Name = Token.Name -Literal = Token.Literal -String = Literal.String -Number = Literal.Number -Punctuation = Token.Punctuation -Operator = Token.Operator -Comparison = Operator.Comparison -Wildcard = Token.Wildcard -Comment = Token.Comment -Assignment = Token.Assignement - -# Generic types for non-source code -Generic = Token.Generic - -# String and some others are not direct childs of Token. -# alias them: -Token.Token = Token -Token.String = String -Token.Number = Number - -# SQL specific tokens -DML = Keyword.DML -DDL = Keyword.DDL -Command = Keyword.Command - -Group = Token.Group -Group.Parenthesis = Token.Group.Parenthesis -Group.Comment = Token.Group.Comment -Group.Where = Token.Group.Where
