Author: Armin Rigo <ar...@tunes.org> Branch: py3.5-fstring-pep498 Changeset: r89692:533c8eeffd5b Date: 2017-01-22 20:26 +0100 http://bitbucket.org/pypy/pypy/changeset/533c8eeffd5b/
Log: in-progress diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py --- a/pypy/interpreter/astcompiler/ast.py +++ b/pypy/interpreter/astcompiler/ast.py @@ -1670,6 +1670,10 @@ return Num.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Str): return Str.from_object(space, w_node) + if space.isinstance_w(w_node, get(space).w_FormattedValue): + return FormattedValue.from_object(space, w_node) + if space.isinstance_w(w_node, get(space).w_JoinedStr): + return JoinedStr.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_Bytes): return Bytes.from_object(space, w_node) if space.isinstance_w(w_node, get(space).w_NameConstant): @@ -2554,6 +2558,98 @@ State.ast_type('Str', 'expr', ['s']) +class FormattedValue(expr): + + def __init__(self, value, conversion, format_spec, lineno, col_offset): + self.value = value + self.conversion = conversion + self.format_spec = format_spec + expr.__init__(self, lineno, col_offset) + + def walkabout(self, visitor): + visitor.visit_FormattedValue(self) + + def mutate_over(self, visitor): + self.value = self.value.mutate_over(visitor) + if self.format_spec: + self.format_spec = self.format_spec.mutate_over(visitor) + return visitor.visit_FormattedValue(self) + + def to_object(self, space): + w_node = space.call_function(get(space).w_FormattedValue) + w_value = self.value.to_object(space) # expr + space.setattr(w_node, space.wrap('value'), w_value) + w_conversion = space.wrap(self.conversion) # int + space.setattr(w_node, space.wrap('conversion'), w_conversion) + w_format_spec = self.format_spec.to_object(space) if self.format_spec is not None else space.w_None # expr + space.setattr(w_node, space.wrap('format_spec'), w_format_spec) + w_lineno = space.wrap(self.lineno) # int + space.setattr(w_node, space.wrap('lineno'), w_lineno) + w_col_offset = space.wrap(self.col_offset) # int + space.setattr(w_node, space.wrap('col_offset'), w_col_offset) + return w_node + + @staticmethod + def from_object(space, w_node): + w_value = get_field(space, w_node, 'value', False) + w_conversion = get_field(space, w_node, 'conversion', True) + w_format_spec = get_field(space, w_node, 'format_spec', True) + w_lineno = get_field(space, w_node, 'lineno', False) + w_col_offset = get_field(space, w_node, 'col_offset', False) + _value = expr.from_object(space, w_value) + if _value is None: + raise_required_value(space, w_node, 'value') + _conversion = space.int_w(w_conversion) + _format_spec = expr.from_object(space, w_format_spec) + _lineno = space.int_w(w_lineno) + _col_offset = space.int_w(w_col_offset) + return FormattedValue(_value, _conversion, _format_spec, _lineno, _col_offset) + +State.ast_type('FormattedValue', 'expr', ['value', 'conversion', 'format_spec']) + + +class JoinedStr(expr): + + def __init__(self, values, lineno, col_offset): + self.values = values + expr.__init__(self, lineno, col_offset) + + def walkabout(self, visitor): + visitor.visit_JoinedStr(self) + + def mutate_over(self, visitor): + if self.values: + visitor._mutate_sequence(self.values) + return visitor.visit_JoinedStr(self) + + def to_object(self, space): + w_node = space.call_function(get(space).w_JoinedStr) + if self.values is None: + values_w = [] + else: + values_w = [node.to_object(space) for node in self.values] # expr + w_values = space.newlist(values_w) + space.setattr(w_node, space.wrap('values'), w_values) + w_lineno = space.wrap(self.lineno) # int + space.setattr(w_node, space.wrap('lineno'), w_lineno) + w_col_offset = space.wrap(self.col_offset) # int + space.setattr(w_node, space.wrap('col_offset'), w_col_offset) + return w_node + + @staticmethod + def from_object(space, w_node): + w_values = get_field(space, w_node, 'values', False) + w_lineno = get_field(space, w_node, 'lineno', False) + w_col_offset = get_field(space, w_node, 'col_offset', False) + values_w = space.unpackiterable(w_values) + _values = [expr.from_object(space, w_item) for w_item in values_w] + _lineno = space.int_w(w_lineno) + _col_offset = space.int_w(w_col_offset) + return JoinedStr(_values, _lineno, _col_offset) + +State.ast_type('JoinedStr', 'expr', ['values']) + + class Bytes(expr): def __init__(self, s, lineno, col_offset): @@ -3924,6 +4020,10 @@ return self.default_visitor(node) def visit_Str(self, node): return self.default_visitor(node) + def visit_FormattedValue(self, node): + return self.default_visitor(node) + def visit_JoinedStr(self, node): + return self.default_visitor(node) def visit_Bytes(self, node): return self.default_visitor(node) def visit_NameConstant(self, node): @@ -4153,6 +4253,14 @@ def visit_Str(self, node): pass + def visit_FormattedValue(self, node): + node.value.walkabout(self) + if node.format_spec: + node.format_spec.walkabout(self) + + def visit_JoinedStr(self, node): + self.visit_sequence(node.values) + def visit_Bytes(self, node): pass diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py --- a/pypy/interpreter/astcompiler/astbuilder.py +++ b/pypy/interpreter/astcompiler/astbuilder.py @@ -1189,7 +1189,58 @@ value = self.handle_expr(node.get_child(i+2)) i += 3 return (i,key,value) - + + def _add_constant_string(self, joined_pieces, w_string, atom_node): + space = self.space + is_unicode = space.isinstance_w(w_string, space.w_unicode) + # Implement implicit string concatenation. + if joined_pieces: + prev = joined_pieces[-1] + if is_unicode and isinstance(prev, ast.Str): + w_string = space.add(prev.s, w_string) + del joined_pieces[-1] + elif not is_unicode and isinstance(prev, ast.Bytes): + w_string = space.add(prev.s, w_string) + del joined_pieces[-1] + node = ast.Str if is_unicode else ast.Bytes + joined_pieces.append(node(w_string, atom_node.get_lineno(), + atom_node.get_column())) + + def _f_string_expr(self, joined_pieces, u, start, atom_node): + # Note: a f-string is kept as a single literal up to here. + # At this point only, we recursively call the AST compiler + # on all the '{expr}' parts. The 'expr' part is not parsed + # or even tokenized together with the rest of the source code! + ... + + def _parse_f_string(self, joined_pieces, w_string, atom_node): + space = self.space + u = space.unicode_w(w_string) + conversion = -1 # the conversion char. -1 if not specified. + nested_depth = 0 # nesting level for braces/parens/brackets in exprs + start = 0 + p1 = u.find(u'{') + p2 = u.find(u'}') + while p1 >= 0 or p2 >= 0: + if p1 >= 0 and (p2 < 0 or p1 < p2): + pn = p1 + 1 + if pn < len(u) and u[pn] == u'{': # '{{' => single '{' + self._add_constant_string(space.newunicode(u[start:pn])) + start = pn + 1 + else: + start = self._f_string_expr(joined_pieces, u, pn, atom_node) + p1 = u.find(u'{', start) + else: + assert p2 >= 0 and (p1 < 0 or p2 < p1) + pn = p2 + 1 + if pn < len(u) and u[pn] == u'}': # '}}' => single '}' + self._add_constant_string(space.newunicode(u[start:pn])) + start = pn + 1 + else: + self.error("unexpected '}' in f-string", atom_node) + p2 = u.find(u'}', start) + self._add_constant_string(space.newunicode(u[start:])) + def handle_atom(self, atom_node): first_child = atom_node.get_child(0) first_child_type = first_child.type @@ -1207,35 +1258,45 @@ first_child.get_column()) return ast.NameConstant(w_singleton, first_child.get_lineno(), first_child.get_column()) + # elif first_child_type == tokens.STRING: space = self.space encoding = self.compile_info.encoding - try: - sub_strings_w = [ - parsestring.parsestr( + joined_pieces = [] + for i in range(atom_node.num_children()): + try: + w_next, saw_f = parsestring.parsestr( space, encoding, atom_node.get_child(i).get_value()) - for i in range(atom_node.num_children())] - except error.OperationError as e: - if not (e.match(space, space.w_UnicodeError) or - e.match(space, space.w_ValueError)): - raise - # Unicode/ValueError in literal: turn into SyntaxError - self.error(e.errorstr(space), atom_node) - sub_strings_w = [] # please annotator - # Implement implicit string concatenation. - w_string = sub_strings_w[0] - for i in range(1, len(sub_strings_w)): - try: - w_string = space.add(w_string, sub_strings_w[i]) except error.OperationError as e: - if not e.match(space, space.w_TypeError): + if not (e.match(space, space.w_UnicodeError) or + e.match(space, space.w_ValueError)): raise + # Unicode/ValueError in literal: turn into SyntaxError + raise self.error(e.errorstr(space), atom_node) + if not saw_f: + self._add_constant_string(joined_pieces, w_next, atom_node) + else: + self._parse_f_string(joined_pieces, w_next, atom_node) + if len(joined_pieces) == 1: # <= the common path + return joined_pieces[0] # ast.Str, Bytes or FormattedValue + # with more than one piece, it is a combination of Str and + # FormattedValue pieces---if there is a Bytes, then we got + # an invalid mixture of bytes and unicode literals + for node in joined_pieces: + if isinstance(node, ast.Bytes): self.error("cannot mix bytes and nonbytes literals", - atom_node) - # UnicodeError in literal: turn into SyntaxError - strdata = space.isinstance_w(w_string, space.w_unicode) - node = ast.Str if strdata else ast.Bytes - return node(w_string, atom_node.get_lineno(), atom_node.get_column()) + atom_node) + # remove empty Strs + values = [node for node in joined_pieces + if not (isinstance(node, ast.Str) and not node.s)] + if len(values) > 1: + return ast.JoinedStr(values) + elif len(values) == 1: + return values[0] + else: + assert len(joined_pieces) > 0 # but all empty strings + return joined_pieces[0] + # elif first_child_type == tokens.NUMBER: num_value = self.parse_number(first_child.get_value()) return ast.Num(num_value, atom_node.get_lineno(), atom_node.get_column()) diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -1384,3 +1384,9 @@ code, blocks = generate_function_code(source, self.space) # there is a stack computation error assert blocks[0].instructions[3].arg == 0 + + def test_fstring(self): + source = """def f(x): + return f'ab{x}cd' + """ + code, blocks = generate_function_code(source, self.space) diff --git a/pypy/interpreter/astcompiler/tools/Python.asdl b/pypy/interpreter/astcompiler/tools/Python.asdl --- a/pypy/interpreter/astcompiler/tools/Python.asdl +++ b/pypy/interpreter/astcompiler/tools/Python.asdl @@ -70,6 +70,8 @@ | Call(expr func, expr* args, keyword* keywords) | Num(object n) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? + | FormattedValue(expr value, int? conversion, expr? format_spec) + | JoinedStr(expr* values) | Bytes(bytes s) -- PyPy mod. first argument name must not be value | NameConstant(singleton single) diff --git a/pypy/interpreter/pyparser/dfa_generated.py b/pypy/interpreter/pyparser/dfa_generated.py --- a/pypy/interpreter/pyparser/dfa_generated.py +++ b/pypy/interpreter/pyparser/dfa_generated.py @@ -23,7 +23,7 @@ '8': 6, '9': 6, ':': 15, ';': 15, '<': 10, '=': 14, '>': 9, '@': 14, 'A': 1, 'B': 2, 'C': 1, 'D': 1, - 'E': 1, 'F': 1, 'G': 1, 'H': 1, + 'E': 1, 'F': 2, 'G': 1, 'H': 1, 'I': 1, 'J': 1, 'K': 1, 'L': 1, 'M': 1, 'N': 1, 'O': 1, 'P': 1, 'Q': 1, 'R': 3, 'S': 1, 'T': 1, @@ -31,7 +31,7 @@ 'Y': 1, 'Z': 1, '[': 15, '\\': 19, ']': 15, '^': 14, '_': 1, '`': 15, 'a': 1, 'b': 2, 'c': 1, 'd': 1, - 'e': 1, 'f': 1, 'g': 1, 'h': 1, + 'e': 1, 'f': 2, 'g': 1, 'h': 1, 'i': 1, 'j': 1, 'k': 1, 'l': 1, 'm': 1, 'n': 1, 'o': 1, 'p': 1, 'q': 1, 'r': 3, 's': 1, 't': 1, @@ -78,14 +78,14 @@ '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1, 'A': 1, 'B': 4, 'C': 1, 'D': 1, - 'E': 1, 'F': 1, 'G': 1, 'H': 1, + 'E': 1, 'F': 4, 'G': 1, 'H': 1, 'I': 1, 'J': 1, 'K': 1, 'L': 1, 'M': 1, 'N': 1, 'O': 1, 'P': 1, 'Q': 1, 'R': 1, 'S': 1, 'T': 1, 'U': 1, 'V': 1, 'W': 1, 'X': 1, 'Y': 1, 'Z': 1, '_': 1, 'a': 1, 'b': 4, 'c': 1, 'd': 1, 'e': 1, - 'f': 1, 'g': 1, 'h': 1, 'i': 1, + 'f': 4, 'g': 1, 'h': 1, 'i': 1, 'j': 1, 'k': 1, 'l': 1, 'm': 1, 'n': 1, 'o': 1, 'p': 1, 'q': 1, 'r': 1, 's': 1, 't': 1, 'u': 1, diff --git a/pypy/interpreter/pyparser/gendfa.py b/pypy/interpreter/pyparser/gendfa.py --- a/pypy/interpreter/pyparser/gendfa.py +++ b/pypy/interpreter/pyparser/gendfa.py @@ -152,9 +152,9 @@ return group(states, chain(states, maybe(states, groupStr(states, "rR")), - maybe(states, groupStr(states, "bB"))), + maybe(states, groupStr(states, "bBfF"))), chain(states, - maybe(states, groupStr(states, "bB")), + maybe(states, groupStr(states, "bBfF")), maybe(states, groupStr(states, "rR"))), maybe(states, groupStr(states, "uU"))) # ____________________________________________________________ diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -5,7 +5,8 @@ def parsestr(space, encoding, s): - """Parses a string or unicode literal, and return a wrapped value. + """Parses a string or unicode literal, and return a pair + (wrapped value, f_string_flag). If encoding=None, the source string is ascii only. In other cases, the source string is in utf-8 encoding. @@ -23,6 +24,7 @@ rawmode = False unicode_literal = True saw_u = False + saw_f = False # string decoration handling if quote == 'b' or quote == 'B': @@ -37,6 +39,10 @@ ps += 1 quote = s[ps] rawmode = True + elif quote == 'f' or quote == 'F': + ps += 1 + quote = s[ps] + saw_f = True if not saw_u: if quote == 'r' or quote == 'R': @@ -47,6 +53,10 @@ ps += 1 quote = s[ps] unicode_literal = False + elif quote == 'f' or quote == 'F': + ps += 1 + quote = s[ps] + saw_f = True if quote != "'" and quote != '"': raise_app_valueerror(space, @@ -64,6 +74,10 @@ 'unmatched triple quotes in literal') q -= 2 + if saw_f: + # forbid any '\' inside '{' and '}' pairs + pass # XXX DO IT + if unicode_literal and not rawmode: # XXX Py_UnicodeFlag is ignored for now if encoding is None: assert 0 <= ps <= q @@ -71,7 +85,7 @@ else: substr = decode_unicode_utf8(space, s, ps, q) v = unicodehelper.decode_unicode_escape(space, substr) - return space.wrap(v) + return space.wrap(v), saw_f assert 0 <= ps <= q substr = s[ps : q] @@ -85,13 +99,13 @@ if rawmode or '\\' not in substr: if not unicode_literal: - return space.newbytes(substr) + return space.newbytes(substr), saw_f else: v = unicodehelper.decode_utf8(space, substr) - return space.wrap(v) + return space.wrap(v), saw_f v = PyString_DecodeEscape(space, substr, 'strict', encoding) - return space.newbytes(v) + return space.newbytes(v), saw_f def decode_unicode_utf8(space, s, ps, q): # ****The Python 2.7 version, producing UTF-32 escapes**** diff --git a/pypy/interpreter/pyparser/pytokenize.py b/pypy/interpreter/pyparser/pytokenize.py --- a/pypy/interpreter/pyparser/pytokenize.py +++ b/pypy/interpreter/pyparser/pytokenize.py @@ -27,10 +27,12 @@ 'R' : None, "u" : None, "U" : None, + 'f' : None, + 'F' : None, 'b' : None, 'B' : None} -for uniPrefix in ("", "b", "B"): +for uniPrefix in ("", "b", "B", "f", "F"): for rawPrefix in ("", "r", "R"): prefix_1 = uniPrefix + rawPrefix prefix_2 = rawPrefix + uniPrefix @@ -55,6 +57,11 @@ for t in ("'''", '"""', "r'''", 'r"""', "R'''", 'R"""', "u'''", 'u"""', "U'''", 'U"""', + "f'''", 'f"""', "F'''", 'F"""', + "fr'''", 'fr"""', "Fr'''", 'Fr"""', + "fR'''", 'fR"""', "FR'''", 'FR"""', + "rf'''", 'rf"""', "rF'''", 'rF"""', + "Rf'''", 'Rf"""', "RF'''", 'RF"""', "b'''", 'b"""', "B'''", 'B"""', "br'''", 'br"""', "Br'''", 'Br"""', "bR'''", 'bR"""', "BR'''", 'BR"""', @@ -65,6 +72,11 @@ for t in ("'", '"', "r'", 'r"', "R'", 'R"', "u'", 'u"', "U'", 'U"', + "f'", 'f"', "F'", 'F"', + "fr'", 'fr"', "Fr'", 'Fr"', + "fR'", 'fR"', "FR'", 'FR"', + "rf'", 'rf"', "rF'", 'rF"', + "Rf'", 'Rf"', "RF'", 'RF"', "b'", 'b"', "B'", 'B"', "br'", 'br"', "Br'", 'Br"', "bR'", 'bR"', "BR'", 'BR"', _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit