Author: Amaury Forgeot d'Arc <[email protected]>
Branch: SomeString-charclass
Changeset: r72437:d9ddbd244e06
Date: 2014-07-13 18:39 +0200
http://bitbucket.org/pypy/pypy/changeset/d9ddbd244e06/
Log: Replace no_nul by the more general "character kind".
diff --git a/rpython/annotator/binaryop.py b/rpython/annotator/binaryop.py
--- a/rpython/annotator/binaryop.py
+++ b/rpython/annotator/binaryop.py
@@ -9,7 +9,7 @@
SomeDict, SomeUnicodeCodePoint, SomeUnicodeString,
SomeTuple, SomeImpossibleValue, s_ImpossibleValue, SomeInstance,
SomeBuiltinMethod, SomeIterator, SomePBC, SomeNone, SomeFloat, s_None,
- SomeByteArray, SomeWeakRef, SomeSingleFloat,
+ SomeByteArray, SomeWeakRef, SomeSingleFloat, AnyChar, AsciiChar,
SomeLongFloat, SomeType, SomeConstantType, unionof, UnionError,
read_can_only_throw, add_knowntypedata,
merge_knowntypedata,)
@@ -372,12 +372,14 @@
def union((str1, str2)):
can_be_None = str1.can_be_None or str2.can_be_None
- no_nul = str1.no_nul and str2.no_nul
- return SomeString(can_be_None=can_be_None, no_nul=no_nul)
+ charkind = str1.charkind.union(str2.charkind)
+ return SomeString(can_be_None=can_be_None,
+ charkind=charkind)
def add((str1, str2)):
# propagate const-ness to help getattr(obj, 'prefix' + const_name)
- result = SomeString(no_nul=str1.no_nul and str2.no_nul)
+ charkind = str1.charkind.union(str2.charkind)
+ result = SomeString(charkind=charkind)
if str1.is_immutable_constant() and str2.is_immutable_constant():
result.const = str1.const + str2.const
return result
@@ -407,8 +409,8 @@
class __extend__(pairtype(SomeChar, SomeChar)):
def union((chr1, chr2)):
- no_nul = chr1.no_nul and chr2.no_nul
- return SomeChar(no_nul=no_nul)
+ charkind = chr1.charkind.union(chr2.charkind)
+ return SomeChar(charkind=charkind)
class __extend__(pairtype(SomeChar, SomeUnicodeCodePoint),
@@ -442,17 +444,15 @@
SomeUnicodeString))):
raise AnnotatorError(
"string formatting mixing strings and unicode not
supported")
- no_nul = s_string.no_nul
+ charkind = s_string.charkind
for s_item in s_tuple.items:
- if isinstance(s_item, SomeFloat):
- pass # or s_item is a subclass, like SomeInteger
- elif (isinstance(s_item, SomeString) or
- isinstance(s_item, SomeUnicodeString)) and s_item.no_nul:
- pass
+ if isinstance(s_item, SomeFloat): # or a subclass, like
SomeInteger
+ charkind = charkind.union(AsciiChar())
+ elif isinstance(s_item, (SomeString, SomeUnicodeString)):
+ charkind = charkind.union(s_item.charkind)
else:
- no_nul = False
- break
- return s_string.__class__(no_nul=no_nul)
+ charkind = AnyChar() # Be conservative
+ return s_string.__class__(charkind=charkind)
class __extend__(pairtype(SomeString, SomeObject),
@@ -616,19 +616,19 @@
class __extend__(pairtype(SomeString, SomeInteger)):
def getitem((str1, int2)):
- return SomeChar(no_nul=str1.no_nul)
+ return SomeChar(charkind=str1.charkind)
getitem.can_only_throw = []
getitem_key = getitem
def getitem_idx((str1, int2)):
- return SomeChar(no_nul=str1.no_nul)
+ return SomeChar(charkind=str1.charkind)
getitem_idx.can_only_throw = [IndexError]
getitem_idx_key = getitem_idx
- def mul((str1, int2)): # xxx do we want to support this
- return SomeString(no_nul=str1.no_nul)
+ def mul((str1, int2)):
+ return SomeString(charkind=str1.charkind)
class __extend__(pairtype(SomeUnicodeString, SomeInteger)):
def getitem((str1, int2)):
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -12,7 +12,8 @@
SomeBuiltin, SomePBC, SomeInteger, TLS, SomeUnicodeCodePoint,
s_None, s_ImpossibleValue, SomeBool, SomeTuple,
SomeImpossibleValue, SomeUnicodeString, SomeList, HarmlesslyBlocked,
- SomeWeakRef, SomeByteArray, SomeConstantType)
+ SomeWeakRef, SomeByteArray, SomeConstantType,
+ charkind_from_const)
from rpython.annotator.classdef import InstanceSource, ClassDef
from rpython.annotator.listdef import ListDef, ListItem
from rpython.annotator.dictdef import DictDef
@@ -230,11 +231,11 @@
else:
raise Exception("seeing a prebuilt long (value %s)" % hex(x))
elif issubclass(tp, str): # py.lib uses annotated str subclasses
- no_nul = not '\x00' in x
+ charkind = charkind_from_const(x)
if len(x) == 1:
- result = SomeChar(no_nul=no_nul)
+ result = SomeChar(charkind=charkind)
else:
- result = SomeString(no_nul=no_nul)
+ result = SomeString(charkind=charkind)
elif tp is unicode:
if len(x) == 1:
result = SomeUnicodeCodePoint()
diff --git a/rpython/annotator/listdef.py b/rpython/annotator/listdef.py
--- a/rpython/annotator/listdef.py
+++ b/rpython/annotator/listdef.py
@@ -1,4 +1,4 @@
-from rpython.annotator.model import s_ImpossibleValue
+from rpython.annotator.model import s_ImpossibleValue, s_Str0
from rpython.annotator.model import SomeList, SomeString
from rpython.annotator.model import unionof, TLS, UnionError, AnnotatorError
@@ -210,5 +210,5 @@
#else: it's fine, don't set immutable=True at all (see
# test_can_merge_immutable_list_with_regular_list)
-s_list_of_strings = SomeList(ListDef(None, SomeString(no_nul=True),
+s_list_of_strings = SomeList(ListDef(None, s_Str0,
resized = True))
diff --git a/rpython/annotator/model.py b/rpython/annotator/model.py
--- a/rpython/annotator/model.py
+++ b/rpython/annotator/model.py
@@ -217,6 +217,49 @@
self.knowntypedata = knowntypedata
+# Character classes.
+
+class AnyChar(object):
+ no_nul = False
+ _instances = {}
+
+ def __new__(cls):
+ return cls._instances[cls]
+
+ @classmethod
+ def _register(cls):
+ cls._instances[cls] = object.__new__(cls)
+
+ def __repr__(self):
+ return type(self).__name__
+
+ def __eq__(self, other):
+ if TLS.check_str_without_nul:
+ if self.no_nul != other.no_nul:
+ return False
+ return True
+
+ def union(self, other):
+ return self
+AnyChar._register()
+
+class NoNulChar(AnyChar):
+ no_nul = True
+
+ def union(self, other):
+ if other.no_nul:
+ return self
+ else:
+ return AnyChar()
+NoNulChar._register()
+
+AsciiChar = AnyChar # So far
+
+def charkind_from_const(value):
+ if '\x00' not in value:
+ return NoNulChar()
+ return AnyChar()
+
class SomeStringOrUnicode(SomeObject):
"""Base class for shared implementation of SomeString,
SomeUnicodeString and SomeByteArray.
@@ -225,36 +268,31 @@
immutable = True
can_be_None = False
- no_nul = False # No NUL character in the string.
- def __init__(self, can_be_None=False, no_nul=False):
+ def __init__(self, can_be_None=False, charkind=None):
assert type(self) is not SomeStringOrUnicode
if can_be_None:
self.can_be_None = True
- if no_nul:
- assert self.immutable #'no_nul' cannot be used with SomeByteArray
- self.no_nul = True
+ if charkind:
+ # charkind cannot be used with SomeByteArray
+ assert self.immutable
+ self.charkind = charkind
+ else:
+ self.charkind = AnyChar()
def can_be_none(self):
return self.can_be_None
- def __eq__(self, other):
- if self.__class__ is not other.__class__:
- return False
- d1 = self.__dict__
- d2 = other.__dict__
- if not TLS.check_str_without_nul:
- d1 = d1.copy()
- d1['no_nul'] = 0
- d2 = d2.copy()
- d2['no_nul'] = 0
- return d1 == d2
-
def nonnoneify(self):
- return self.__class__(can_be_None=False, no_nul=self.no_nul)
+ return self.__class__(can_be_None=False,
+ charkind=self.charkind)
def nonnulify(self):
- return self.__class__(can_be_None=self.can_be_None, no_nul=True)
+ if self.charkind == NoNulChar():
+ charkind = NoNulChar()
+ elif self.charkind == AnyChar():
+ charkind = NoNulChar()
+ return self.__class__(can_be_None=self.can_be_None, charkind=charkind)
class SomeString(SomeStringOrUnicode):
@@ -262,7 +300,7 @@
knowntype = str
def noneify(self):
- return SomeString(can_be_None=True, no_nul=self.no_nul)
+ return SomeString(can_be_None=True, charkind=self.charkind)
class SomeUnicodeString(SomeStringOrUnicode):
@@ -270,7 +308,7 @@
knowntype = unicode
def noneify(self):
- return SomeUnicodeString(can_be_None=True, no_nul=self.no_nul)
+ return SomeUnicodeString(can_be_None=True, charkind=self.charkind)
class SomeByteArray(SomeStringOrUnicode):
@@ -282,18 +320,18 @@
"Stands for an object known to be a string of length 1."
can_be_None = False
- def __init__(self, no_nul=False): # no 'can_be_None' argument here
- if no_nul:
- self.no_nul = True
+ def __init__(self, charkind=None):
+ # no 'can_be_None' argument here
+ SomeString.__init__(self, charkind=charkind)
class SomeUnicodeCodePoint(SomeUnicodeString):
"Stands for an object known to be a unicode codepoint."
can_be_None = False
- def __init__(self, no_nul=False): # no 'can_be_None' argument here
- if no_nul:
- self.no_nul = True
+ def __init__(self, charkind=False):
+ # no 'can_be_None' argument here
+ SomeUnicodeString.__init__(self, charkind=charkind)
SomeString.basestringclass = SomeString
SomeString.basecharclass = SomeChar
@@ -586,8 +624,8 @@
s_Bool = SomeBool()
s_Int = SomeInteger()
s_ImpossibleValue = SomeImpossibleValue()
-s_Str0 = SomeString(no_nul=True)
-s_Unicode0 = SomeUnicodeString(no_nul=True)
+s_Str0 = SomeString(charkind=NoNulChar)
+s_Unicode0 = SomeUnicodeString(charkind=NoNulChar)
# ____________________________________________________________
diff --git a/rpython/annotator/test/test_annrpython.py
b/rpython/annotator/test/test_annrpython.py
--- a/rpython/annotator/test/test_annrpython.py
+++ b/rpython/annotator/test/test_annrpython.py
@@ -428,7 +428,7 @@
return ''.join(g(n))
s = a.build_types(f, [int])
assert s.knowntype == str
- assert s.no_nul
+ assert s.charkind.no_nul
def test_str_split(self):
a = self.RPythonAnnotator()
@@ -441,26 +441,26 @@
s = a.build_types(f, [int])
assert isinstance(s, annmodel.SomeList)
s_item = s.listdef.listitem.s_value
- assert s_item.no_nul
+ assert s_item.charkind.no_nul
def test_str_split_nul(self):
def f(n):
return n.split('\0')[0]
a = self.RPythonAnnotator()
a.translator.config.translation.check_str_without_nul = True
- s = a.build_types(f, [annmodel.SomeString(no_nul=False,
can_be_None=False)])
+ s = a.build_types(f, [annmodel.SomeString()])
assert isinstance(s, annmodel.SomeString)
assert not s.can_be_None
- assert s.no_nul
+ assert s.charkind.no_nul
def g(n):
return n.split('\0', 1)[0]
a = self.RPythonAnnotator()
a.translator.config.translation.check_str_without_nul = True
- s = a.build_types(g, [annmodel.SomeString(no_nul=False,
can_be_None=False)])
+ s = a.build_types(g, [annmodel.SomeString()])
assert isinstance(s, annmodel.SomeString)
assert not s.can_be_None
- assert not s.no_nul
+ assert not s.charkind.no_nul
def test_str_splitlines(self):
a = self.RPythonAnnotator()
@@ -479,8 +479,9 @@
return a_str.rstrip(' ')
else:
return a_str.lstrip(' ')
- s = a.build_types(f, [int, annmodel.SomeString(no_nul=True)])
- assert s.no_nul
+ s = a.build_types(f, [int, annmodel.SomeString(
+ charkind=annmodel.NoNulChar)])
+ assert s.charkind.no_nul
def test_str_mul(self):
a = self.RPythonAnnotator()
@@ -2016,7 +2017,7 @@
a = self.RPythonAnnotator()
s = a.build_types(f, [int])
assert s.can_be_None
- assert s.no_nul
+ assert s.charkind.no_nul
def test_str_or_None(self):
def f(a):
@@ -2032,7 +2033,7 @@
a = self.RPythonAnnotator()
s = a.build_types(f, [int])
assert s.can_be_None
- assert s.no_nul
+ assert s.charkind.no_nul
def test_emulated_pbc_call_simple(self):
def f(a,b):
@@ -2098,15 +2099,16 @@
a = self.RPythonAnnotator()
s = a.build_types(f, [])
assert isinstance(s, annmodel.SomeString)
- assert s.no_nul
+ assert s.charkind.no_nul
def test_mul_str0(self):
def f(s):
return s*10
a = self.RPythonAnnotator()
- s = a.build_types(f, [annmodel.SomeString(no_nul=True)])
+ s = a.build_types(f, [annmodel.SomeString(
+ charkind=annmodel.NoNulChar())])
assert isinstance(s, annmodel.SomeString)
- assert s.no_nul
+ assert s.charkind.no_nul
def test_getitem_str0(self):
def f(s, n):
@@ -2120,10 +2122,11 @@
a = self.RPythonAnnotator()
a.translator.config.translation.check_str_without_nul = True
- s = a.build_types(f, [annmodel.SomeString(no_nul=True),
+ s = a.build_types(f, [annmodel.SomeString(
+ charkind=annmodel.NoNulChar()),
annmodel.SomeInteger()])
assert isinstance(s, annmodel.SomeString)
- assert s.no_nul
+ assert s.charkind.no_nul
def test_non_none_and_none_with_isinstance(self):
class A(object):
@@ -3353,7 +3356,7 @@
a = self.RPythonAnnotator()
s = a.build_types(f, [str])
assert isinstance(s, annmodel.SomeString)
- assert s.no_nul
+ assert s.charkind.no_nul
def f(x):
return u'a'.replace(x, u'b')
@@ -3861,10 +3864,10 @@
return i
a = self.RPythonAnnotator()
a.translator.config.translation.check_str_without_nul = True
- s = a.build_types(f, [annmodel.SomeString(no_nul=False)])
+ s = a.build_types(f,
[annmodel.SomeString(charkind=annmodel.AnyChar())])
assert isinstance(s, annmodel.SomeString)
assert s.can_be_None
- assert s.no_nul
+ assert s.charkind.no_nul
def test_no___call__(self):
class X(object):
@@ -3884,7 +3887,7 @@
a = self.RPythonAnnotator()
s = a.build_types(fn, [])
assert isinstance(s, annmodel.SomeString)
- assert s.no_nul
+ assert s.charkind.no_nul
def test_os_getenv(self):
import os
@@ -3893,7 +3896,7 @@
a = self.RPythonAnnotator()
s = a.build_types(fn, [])
assert isinstance(s, annmodel.SomeString)
- assert s.no_nul
+ assert s.charkind.no_nul
def test_base_iter(self):
class A(object):
diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -9,7 +9,7 @@
SomeString, SomeChar, SomeList, SomeDict, SomeTuple, SomeImpossibleValue,
SomeUnicodeCodePoint, SomeInstance, SomeBuiltin, SomeBuiltinMethod,
SomeFloat, SomeIterator, SomePBC, SomeNone, SomeType, s_ImpossibleValue,
- s_Bool, s_None, unionof, add_knowntypedata,
+ s_Bool, s_None, unionof, add_knowntypedata, NoNulChar,
HarmlesslyBlocked, SomeWeakRef, SomeUnicodeString, SomeByteArray)
from rpython.annotator.bookkeeper import getbookkeeper, immutablevalue
from rpython.annotator import builtin
@@ -459,13 +459,13 @@
return SomeInteger(nonneg=True)
def method_strip(self, chr=None):
- return self.basestringclass(no_nul=self.no_nul)
+ return self.basestringclass(charkind=self.charkind)
def method_lstrip(self, chr=None):
- return self.basestringclass(no_nul=self.no_nul)
+ return self.basestringclass(charkind=self.charkind)
def method_rstrip(self, chr=None):
- return self.basestringclass(no_nul=self.no_nul)
+ return self.basestringclass(charkind=self.charkind)
def method_join(self, s_list):
if s_None.contains(s_list):
@@ -475,8 +475,8 @@
if isinstance(self, SomeUnicodeString):
return immutablevalue(u"")
return immutablevalue("")
- no_nul = self.no_nul and s_item.no_nul
- return self.basestringclass(no_nul=no_nul)
+ charkind = self.charkind.union(s_item.charkind)
+ return self.basestringclass(charkind=charkind)
def iter(self):
return SomeIterator(self)
@@ -487,23 +487,23 @@
def method_split(self, patt, max=-1):
if max == -1 and patt.is_constant() and patt.const == "\0":
- no_nul = True
+ charkind = NoNulChar
else:
- no_nul = self.no_nul
- s_item = self.basestringclass(no_nul=no_nul)
+ charkind = self.charkind
+ s_item = self.basestringclass(charkind=charkind)
return getbookkeeper().newlist(s_item)
def method_rsplit(self, patt, max=-1):
- s_item = self.basestringclass(no_nul=self.no_nul)
+ s_item = self.basestringclass(charkind=self.charkind)
return getbookkeeper().newlist(s_item)
def method_replace(self, s1, s2):
- return self.basestringclass(no_nul=self.no_nul and s2.no_nul)
+ return self.basestringclass(
+ charkind=self.charkind.union(s2.charkind))
def getslice(self, s_start, s_stop):
check_negative_slice(s_start, s_stop)
- result = self.basestringclass(no_nul=self.no_nul)
- return result
+ return self.basestringclass(charkind=self.charkind)
def op_contains(self, s_element):
if s_element.is_constant() and s_element.const == "\0":
diff --git a/rpython/rlib/rmarshal.py b/rpython/rlib/rmarshal.py
--- a/rpython/rlib/rmarshal.py
+++ b/rpython/rlib/rmarshal.py
@@ -230,16 +230,14 @@
raise ValueError("expected a string")
length = readlong(loader)
return assert_str0(readstr(loader, length))
-add_loader(annmodel.SomeString(can_be_None=False, no_nul=True),
- load_string_nonul)
+add_loader(annmodel.s_Str0, load_string_nonul)
def load_string(loader):
if readchr(loader) != TYPE_STRING:
raise ValueError("expected a string")
length = readlong(loader)
return readstr(loader, length)
-add_loader(annmodel.SomeString(can_be_None=False, no_nul=False),
- load_string)
+add_loader(annmodel.SomeString(), load_string)
def load_string_or_none_nonul(loader):
t = readchr(loader)
@@ -250,8 +248,7 @@
return None
else:
raise ValueError("expected a string or None")
-add_loader(annmodel.SomeString(can_be_None=True, no_nul=True),
- load_string_or_none_nonul)
+add_loader(annmodel.s_Str0.noneify(), load_string_or_none_nonul)
def load_string_or_none(loader):
t = readchr(loader)
@@ -262,8 +259,7 @@
return None
else:
raise ValueError("expected a string or None")
-add_loader(annmodel.SomeString(can_be_None=True, no_nul=False),
- load_string_or_none)
+add_loader(annmodel.SomeString().noneify(), load_string_or_none)
# ____________________________________________________________
#
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -3,7 +3,7 @@
import sys
from rpython.annotator.model import (SomeObject, SomeString, s_None, SomeChar,
- SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString, SomePBC)
+ SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString, SomePBC, NoNulChar)
from rpython.rtyper.llannotation import SomePtr
from rpython.rlib import jit
from rpython.rlib.objectmodel import newlist_hint, specialize
@@ -526,11 +526,11 @@
if s_None.contains(s_obj):
return s_obj
assert isinstance(s_obj, (SomeString, SomeUnicodeString))
- if s_obj.no_nul:
+ if s_obj.charkind.no_nul:
return s_obj
new_s_obj = SomeObject.__new__(s_obj.__class__)
new_s_obj.__dict__ = s_obj.__dict__.copy()
- new_s_obj.no_nul = True
+ new_s_obj.charkind = NoNulChar()
return new_s_obj
def specialize_call(self, hop):
@@ -548,7 +548,7 @@
def compute_result_annotation(self, s_obj):
if not isinstance(s_obj, (SomeString, SomeUnicodeString)):
return s_obj
- if not s_obj.no_nul:
+ if not s_obj.charkind.no_nul:
raise ValueError("Value is not no_nul")
def specialize_call(self, hop):
diff --git a/rpython/rlib/test/test_signature.py
b/rpython/rlib/test/test_signature.py
--- a/rpython/rlib/test/test_signature.py
+++ b/rpython/rlib/test/test_signature.py
@@ -119,8 +119,8 @@
@signature(types.unicode0(), returns=types.str0())
def f(u):
return 'str'
- assert getsig(f) == [model.SomeUnicodeString(no_nul=True),
- model.SomeString(no_nul=True)]
+ assert getsig(f) == [model.SomeUnicodeString.s_Str0,
+ model.SomeString.s_Unicode0]
def test_ptr():
policy = LowLevelAnnotatorPolicy()
diff --git a/rpython/rlib/types.py b/rpython/rlib/types.py
--- a/rpython/rlib/types.py
+++ b/rpython/rlib/types.py
@@ -36,11 +36,14 @@
def unicode0():
- return model.SomeUnicodeString(no_nul=True)
+ return model.s_Unicode0()
def str(can_be_None=False):
- return model.SomeString(can_be_None=can_be_None)
+ if can_be_None:
+ return model.SomeString().noneify()
+ else:
+ return model.SomeString()
def bytearray():
@@ -48,7 +51,7 @@
def str0():
- return model.SomeString(no_nul=True)
+ return model.s_str0
def char():
diff --git a/rpython/rtyper/module/ll_os_environ.py
b/rpython/rtyper/module/ll_os_environ.py
--- a/rpython/rtyper/module/ll_os_environ.py
+++ b/rpython/rtyper/module/ll_os_environ.py
@@ -181,8 +181,7 @@
register_external(r_envitems, [], [(str0, str0)],
export_name='ll_os.ll_os_envitems',
llimpl=envitems_llimpl)
-register_external(r_getenv, [str0],
- annmodel.SomeString(can_be_None=True, no_nul=True),
+register_external(r_getenv, [str0], str0.noneify(),
export_name='ll_os.ll_os_getenv',
llimpl=getenv_llimpl)
register_external(r_putenv, [str0, str0], annmodel.s_None,
diff --git a/rpython/rtyper/test/test_extfunc.py
b/rpython/rtyper/test/test_extfunc.py
--- a/rpython/rtyper/test/test_extfunc.py
+++ b/rpython/rtyper/test/test_extfunc.py
@@ -138,7 +138,7 @@
assert isinstance(s, annmodel.SomeString)
def test_str0(self):
- str0 = annmodel.SomeString(no_nul=True)
+ str0 = annmodel.s_Str0
def os_open(s):
pass
register_external(os_open, [str0], None)
@@ -156,7 +156,7 @@
a.build_types(g, [str0]) # Does not raise
def test_list_of_str0(self):
- str0 = annmodel.SomeString(no_nul=True)
+ str0 = annmodel.s_Str0
def os_execve(l):
pass
register_external(os_execve, [[str0]], None)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit