Author: Armin Rigo <ar...@tunes.org> Branch: py3.5-fstring-pep498 Changeset: r89721:9f329ca0b48f Date: 2017-01-24 08:50 +0100 http://bitbucket.org/pypy/pypy/changeset/9f329ca0b48f/
Log: Split the logic into its own file diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py --- a/pypy/interpreter/astcompiler/astbuilder.py +++ b/pypy/interpreter/astcompiler/astbuilder.py @@ -1,9 +1,9 @@ from pypy.interpreter.astcompiler import ast, consts, misc from pypy.interpreter.astcompiler import asthelpers # Side effects +from pypy.interpreter.astcompiler import fstring from pypy.interpreter import error from pypy.interpreter.pyparser.pygram import syms, tokens from pypy.interpreter.pyparser.error import SyntaxError -from pypy.interpreter.pyparser import parsestring from rpython.rlib.objectmodel import always_inline, we_are_translated @@ -1191,150 +1191,6 @@ i += 3 return (i,key,value) - def _add_constant_string(self, joined_pieces, w_string, atom_node): - space = self.space - is_unicode = space.isinstance_w(w_string, space.w_unicode) - # Implement implicit string concatenation. - if joined_pieces: - prev = joined_pieces[-1] - if is_unicode and isinstance(prev, ast.Str): - w_string = space.add(prev.s, w_string) - del joined_pieces[-1] - elif not is_unicode and isinstance(prev, ast.Bytes): - w_string = space.add(prev.s, w_string) - del joined_pieces[-1] - node = ast.Str if is_unicode else ast.Bytes - joined_pieces.append(node(w_string, atom_node.get_lineno(), - atom_node.get_column())) - - def _f_constant_string(self, joined_pieces, u, atom_node): - self._add_constant_string(joined_pieces, self.space.newunicode(u), - atom_node) - - def _f_string_compile(self, source, atom_node): - # Note: a f-string is kept as a single literal up to here. - # At this point only, we recursively call the AST compiler - # on all the '{expr}' parts. The 'expr' part is not parsed - # or even tokenized together with the rest of the source code! - from pypy.interpreter.pyparser import pyparse - - # complain if 'source' is only whitespace or an empty string - for c in source: - if c not in ' \t\n\r\v\f': - break - else: - self.error("f-string: empty expression not allowed", atom_node) - - if self.recursive_parser is None: - self.error("internal error: parser not available for parsing " - "the expressions inside the f-string", atom_node) - source = '(%s)' % source.encode('utf-8') - - info = pyparse.CompileInfo("<fstring>", "eval", - consts.PyCF_SOURCE_IS_UTF8 | - consts.PyCF_IGNORE_COOKIE | - consts.PyCF_REFUSE_COMMENTS, - optimize=self.compile_info.optimize) - parse_tree = self.recursive_parser.parse_source(source, info) - return ast_from_node(self.space, parse_tree, info) - - def _f_string_expr(self, joined_pieces, u, start, atom_node, rec=0): - conversion = -1 # the conversion char. -1 if not specified. - format_spec = None - nested_depth = 0 # nesting level for braces/parens/brackets in exprs - p = start - while p < len(u): - ch = u[p] - p += 1 - if ch in u'[{(': - nested_depth += 1 - elif nested_depth > 0 and ch in u']})': - nested_depth -= 1 - elif nested_depth == 0 and ch in u'!:}': - # special-case '!=' - if ch == u'!' and p < len(u) and u[p] == u'=': - continue - break # normal way out of this loop - else: - ch = u'\x00' - # - if nested_depth > 0: - self.error("f-string: mismatched '(', '{' or '['", atom_node) - end_expression = p - 1 - if ch == u'!': - if p + 1 < len(u): - conversion = ord(u[p]) - ch = u[p + 1] - p += 2 - if conversion not in (ord('s'), ord('r'), ord('a')): - self.error("f-string: invalid conversion character: " - "expected 's', 'r', or 'a'", atom_node) - if ch == u':': - if rec >= 2: - self.error("f-string: expressions nested too deeply", atom_node) - subpieces = [] - p = self._parse_f_string(subpieces, u, p, atom_node, rec + 1) - format_spec = self._f_string_to_ast_node(subpieces, atom_node) - ch = u[p] if p >= 0 else u'\x00' - p += 1 - - if ch != u'}': - self.error("f-string: expecting '}'", atom_node) - end_f_string = p - assert end_expression >= start - expr = self._f_string_compile(u[start:end_expression], atom_node) - assert isinstance(expr, ast.Expression) - fval = ast.FormattedValue(expr.body, conversion, format_spec, - atom_node.get_lineno(), - atom_node.get_column()) - joined_pieces.append(fval) - return end_f_string - - def _parse_f_string(self, joined_pieces, u, start, atom_node, rec=0): - space = self.space - p1 = u.find(u'{', start) - prestart = start - while True: - if p1 < 0: - p1 = len(u) - p2 = u.find(u'}', start, p1) - if p2 >= 0: - self._f_constant_string(joined_pieces, u[prestart:p2], - atom_node) - pn = p2 + 1 - if pn < len(u) and u[pn] == u'}': # '}}' => single '}' - start = pn + 1 - prestart = pn - continue - return p2 # found a single '}', stop here - self._f_constant_string(joined_pieces, u[prestart:p1], atom_node) - if p1 == len(u): - return -1 # no more '{' or '}' left - pn = p1 + 1 - if pn < len(u) and u[pn] == u'{': # '{{' => single '{' - start = pn + 1 - prestart = pn - else: - assert u[p1] == u'{' - start = self._f_string_expr(joined_pieces, u, pn, - atom_node, rec) - assert u[start - 1] == u'}' - prestart = start - p1 = u.find(u'{', start) - - def _f_string_to_ast_node(self, joined_pieces, atom_node): - # remove empty Strs - values = [node for node in joined_pieces - if not (isinstance(node, ast.Str) and not node.s)] - if len(values) > 1: - return ast.JoinedStr(values, atom_node.get_lineno(), - atom_node.get_column()) - elif len(values) == 1: - return values[0] - else: - assert len(joined_pieces) > 0 # they are all empty strings - return joined_pieces[0] - def handle_atom(self, atom_node): first_child = atom_node.get_child(0) first_child_type = first_child.type @@ -1354,38 +1210,7 @@ first_child.get_column()) # elif first_child_type == tokens.STRING: - space = self.space - encoding = self.compile_info.encoding - joined_pieces = [] - for i in range(atom_node.num_children()): - try: - w_next, saw_f = parsestring.parsestr( - space, encoding, atom_node.get_child(i).get_value()) - except error.OperationError as e: - if not (e.match(space, space.w_UnicodeError) or - e.match(space, space.w_ValueError)): - raise - # Unicode/ValueError in literal: turn into SyntaxError - raise self.error(e.errorstr(space), atom_node) - if not saw_f: - self._add_constant_string(joined_pieces, w_next, atom_node) - else: - p = self._parse_f_string(joined_pieces, - space.unicode_w(w_next), 0, - atom_node) - if p != -1: - self.error("f-string: single '}' is not allowed", - atom_node) - if len(joined_pieces) == 1: # <= the common path - return joined_pieces[0] # ast.Str, Bytes or FormattedValue - # with more than one piece, it is a combination of Str and - # FormattedValue pieces---if there is a Bytes, then we got - # an invalid mixture of bytes and unicode literals - for node in joined_pieces: - if isinstance(node, ast.Bytes): - self.error("cannot mix bytes and nonbytes literals", - atom_node) - return self._f_string_to_ast_node(joined_pieces, atom_node) + return fstring.string_parse_literal(self, atom_node) # elif first_child_type == tokens.NUMBER: num_value = self.parse_number(first_child.get_value()) diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/astcompiler/fstring.py @@ -0,0 +1,185 @@ +from pypy.interpreter.astcompiler import ast, consts +from pypy.interpreter.pyparser import parsestring +from pypy.interpreter import error + + +def add_constant_string(astbuilder, joined_pieces, w_string, atom_node): + space = astbuilder.space + is_unicode = space.isinstance_w(w_string, space.w_unicode) + # Implement implicit string concatenation. + if joined_pieces: + prev = joined_pieces[-1] + if is_unicode and isinstance(prev, ast.Str): + w_string = space.add(prev.s, w_string) + del joined_pieces[-1] + elif not is_unicode and isinstance(prev, ast.Bytes): + w_string = space.add(prev.s, w_string) + del joined_pieces[-1] + node = ast.Str if is_unicode else ast.Bytes + joined_pieces.append(node(w_string, atom_node.get_lineno(), + atom_node.get_column())) + +def f_constant_string(astbuilder, joined_pieces, u, atom_node): + space = astbuilder.space + add_constant_string(astbuilder, joined_pieces, space.newunicode(u), + atom_node) + +def f_string_compile(astbuilder, source, atom_node): + # Note: a f-string is kept as a single literal up to here. + # At this point only, we recursively call the AST compiler + # on all the '{expr}' parts. The 'expr' part is not parsed + # or even tokenized together with the rest of the source code! + from pypy.interpreter.pyparser import pyparse + from pypy.interpreter.astcompiler.astbuilder import ast_from_node + + # complain if 'source' is only whitespace or an empty string + for c in source: + if c not in ' \t\n\r\v\f': + break + else: + astbuilder.error("f-string: empty expression not allowed", atom_node) + + if astbuilder.recursive_parser is None: + astbuilder.error("internal error: parser not available for parsing " + "the expressions inside the f-string", atom_node) + source = '(%s)' % source.encode('utf-8') + + info = pyparse.CompileInfo("<fstring>", "eval", + consts.PyCF_SOURCE_IS_UTF8 | + consts.PyCF_IGNORE_COOKIE | + consts.PyCF_REFUSE_COMMENTS, + optimize=astbuilder.compile_info.optimize) + parse_tree = astbuilder.recursive_parser.parse_source(source, info) + return ast_from_node(astbuilder.space, parse_tree, info) + +def f_string_expr(astbuilder, joined_pieces, u, start, atom_node, rec=0): + conversion = -1 # the conversion char. -1 if not specified. + format_spec = None + nested_depth = 0 # nesting level for braces/parens/brackets in exprs + p = start + while p < len(u): + ch = u[p] + p += 1 + if ch in u'[{(': + nested_depth += 1 + elif nested_depth > 0 and ch in u']})': + nested_depth -= 1 + elif nested_depth == 0 and ch in u'!:}': + # special-case '!=' + if ch == u'!' and p < len(u) and u[p] == u'=': + continue + break # normal way out of this loop + else: + ch = u'\x00' + # + if nested_depth > 0: + astbuilder.error("f-string: mismatched '(', '{' or '['", atom_node) + end_expression = p - 1 + if ch == u'!': + if p + 1 < len(u): + conversion = ord(u[p]) + ch = u[p + 1] + p += 2 + if conversion not in (ord('s'), ord('r'), ord('a')): + astbuilder.error("f-string: invalid conversion character: " + "expected 's', 'r', or 'a'", atom_node) + if ch == u':': + if rec >= 2: + astbuilder.error("f-string: expressions nested too deeply", + atom_node) + subpieces = [] + p = parse_f_string(astbuilder, subpieces, u, p, atom_node, rec + 1) + format_spec = f_string_to_ast_node(astbuilder, subpieces, atom_node) + ch = u[p] if p >= 0 else u'\x00' + p += 1 + + if ch != u'}': + astbuilder.error("f-string: expecting '}'", atom_node) + end_f_string = p + assert end_expression >= start + expr = f_string_compile(astbuilder, u[start:end_expression], atom_node) + assert isinstance(expr, ast.Expression) + fval = ast.FormattedValue(expr.body, conversion, format_spec, + atom_node.get_lineno(), + atom_node.get_column()) + joined_pieces.append(fval) + return end_f_string + +def parse_f_string(astbuilder, joined_pieces, u, start, atom_node, rec=0): + space = astbuilder.space + p1 = u.find(u'{', start) + prestart = start + while True: + if p1 < 0: + p1 = len(u) + p2 = u.find(u'}', start, p1) + if p2 >= 0: + f_constant_string(astbuilder, joined_pieces, u[prestart:p2], + atom_node) + pn = p2 + 1 + if pn < len(u) and u[pn] == u'}': # '}}' => single '}' + start = pn + 1 + prestart = pn + continue + return p2 # found a single '}', stop here + f_constant_string(astbuilder, joined_pieces, u[prestart:p1], atom_node) + if p1 == len(u): + return -1 # no more '{' or '}' left + pn = p1 + 1 + if pn < len(u) and u[pn] == u'{': # '{{' => single '{' + start = pn + 1 + prestart = pn + else: + assert u[p1] == u'{' + start = f_string_expr(astbuilder, joined_pieces, u, pn, + atom_node, rec) + assert u[start - 1] == u'}' + prestart = start + p1 = u.find(u'{', start) + +def f_string_to_ast_node(astbuilder, joined_pieces, atom_node): + # remove empty Strs + values = [node for node in joined_pieces + if not (isinstance(node, ast.Str) and not node.s)] + if len(values) > 1: + return ast.JoinedStr(values, atom_node.get_lineno(), + atom_node.get_column()) + elif len(values) == 1: + return values[0] + else: + assert len(joined_pieces) > 0 # they are all empty strings + return joined_pieces[0] + +def string_parse_literal(astbuilder, atom_node): + space = astbuilder.space + encoding = astbuilder.compile_info.encoding + joined_pieces = [] + for i in range(atom_node.num_children()): + try: + w_next, saw_f = parsestring.parsestr( + space, encoding, atom_node.get_child(i).get_value()) + except error.OperationError as e: + if not (e.match(space, space.w_UnicodeError) or + e.match(space, space.w_ValueError)): + raise + # Unicode/ValueError in literal: turn into SyntaxError + raise astbuilder.error(e.errorstr(space), atom_node) + if not saw_f: + add_constant_string(astbuilder, joined_pieces, w_next, atom_node) + else: + p = parse_f_string(astbuilder, joined_pieces, + space.unicode_w(w_next), 0, + atom_node) + if p != -1: + astbuilder.error("f-string: single '}' is not allowed", + atom_node) + if len(joined_pieces) == 1: # <= the common path + return joined_pieces[0] # ast.Str, Bytes or FormattedValue + # with more than one piece, it is a combination of Str and + # FormattedValue pieces---if there is a Bytes, then we got + # an invalid mixture of bytes and unicode literals + for node in joined_pieces: + if isinstance(node, ast.Bytes): + astbuilder.error("cannot mix bytes and nonbytes literals", + atom_node) + return f_string_to_ast_node(astbuilder, joined_pieces, atom_node) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit