Author: Armin Rigo <[email protected]>
Branch:
Changeset: r85137:418b05f95db5
Date: 2016-06-13 19:07 +0200
http://bitbucket.org/pypy/pypy/changeset/418b05f95db5/
Log: Improve CPython compatibility of 'is'. Now 'x is y' is guaranteed to
return True if x == y and x, y are:
* empty strings; empty unicode strings
* single-character (unicode) strings
* empty tuples
This is in addition to all other special cases (ints, etc.)
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -18,6 +18,7 @@
from pypy.objspace.std.unicodeobject import (
decode_object, unicode_from_encoded_object,
unicode_from_string, getdefaultencoding)
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
class W_AbstractBytesObject(W_Root):
@@ -30,12 +31,26 @@
return True
if self.user_overridden_class or w_other.user_overridden_class:
return False
- return space.str_w(self) is space.str_w(w_other)
+ s1 = space.str_w(self)
+ s2 = space.str_w(w_other)
+ if len(s2) > 1:
+ return s1 is s2
+ else: # strings of len <= 1 are unique-ified
+ return s1 == s2
def immutable_unique_id(self, space):
if self.user_overridden_class:
return None
- return space.wrap(compute_unique_id(space.str_w(self)))
+ s = space.str_w(self)
+ if len(s) > 1:
+ uid = compute_unique_id(s)
+ else: # strings of len <= 1 are unique-ified
+ if len(s) == 1:
+ base = ord(s[0]) # base values 0-255
+ else:
+ base = 256 # empty string: base value 256
+ uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL
+ return space.wrap(uid)
def unicode_w(self, space):
# Use the default encoding.
diff --git a/pypy/objspace/std/test/test_obj.py
b/pypy/objspace/std/test/test_obj.py
--- a/pypy/objspace/std/test/test_obj.py
+++ b/pypy/objspace/std/test/test_obj.py
@@ -186,17 +186,28 @@
def test_id_on_strs(self):
if self.appdirect:
skip("cannot run this test as apptest")
- u = u"a"
- assert id(self.unwrap_wrap_unicode(u)) == id(u)
- s = "a"
- assert id(self.unwrap_wrap_str(s)) == id(s)
+ for u in [u"", u"a", u"aa"]:
+ assert id(self.unwrap_wrap_unicode(u)) == id(u)
+ s = str(u)
+ assert id(self.unwrap_wrap_str(s)) == id(s)
+ #
+ assert id('') == (256 << 4) | 11 # always
+ assert id(u'') == (257 << 4) | 11
+ assert id('a') == (ord('a') << 4) | 11
+ assert id(u'\u1234') == ((~0x1234) << 4) | 11
+
+ def test_id_of_tuples(self):
+ l = []
+ x = (l,)
+ assert id(x) != id((l,)) # no caching at all
+ if self.appdirect:
+ skip("cannot run this test as apptest")
+ assert id(()) == (258 << 4) | 11 # always
def test_identity_vs_id_primitives(self):
- if self.cpython_apptest:
- skip("cpython behaves differently")
import sys
- l = range(-10, 10)
- for i in range(10):
+ l = range(-10, 10, 2)
+ for i in [0, 1, 3]:
l.append(float(i))
l.append(i + 0.1)
l.append(long(i))
@@ -206,18 +217,13 @@
l.append(i - 1j)
l.append(1 + i * 1j)
l.append(1 - i * 1j)
- s = str(i)
- l.append(s)
- u = unicode(s)
- l.append(u)
+ l.append((i,))
l.append(-0.0)
l.append(None)
l.append(True)
l.append(False)
- s = "s"
- l.append(s)
- s = u"s"
- l.append(s)
+ l.append(())
+ l.append(tuple([]))
for i, a in enumerate(l):
for b in l[i:]:
@@ -228,21 +234,18 @@
def test_identity_vs_id_str(self):
if self.appdirect:
skip("cannot run this test as apptest")
- import sys
- l = range(-10, 10)
- for i in range(10):
- s = str(i)
+ l = []
+ def add(s, u):
l.append(s)
l.append(self.unwrap_wrap_str(s))
- u = unicode(s)
+ l.append(s[:1] + s[1:])
l.append(u)
l.append(self.unwrap_wrap_unicode(u))
- s = "s"
- l.append(s)
- l.append(self.unwrap_wrap_str(s))
- s = u"s"
- l.append(s)
- l.append(self.unwrap_wrap_unicode(s))
+ l.append(u[:1] + u[1:])
+ for i in range(3, 18):
+ add(str(i), unicode(i))
+ add("s", u"s")
+ add("", u"")
for i, a in enumerate(l):
for b in l[i:]:
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -9,7 +9,7 @@
from pypy.interpreter.typedef import TypeDef
from pypy.objspace.std.sliceobject import (W_SliceObject, unwrap_start_stop,
normalize_simple_slice)
-from pypy.objspace.std.util import negate
+from pypy.objspace.std.util import negate, IDTAG_SPECIAL, IDTAG_SHIFT
from rpython.rlib import jit
from rpython.rlib.debug import make_sure_not_resized
from rpython.rlib.rarithmetic import intmask
@@ -38,6 +38,23 @@
class W_AbstractTupleObject(W_Root):
__slots__ = ()
+ def is_w(self, space, w_other):
+ if not isinstance(w_other, W_AbstractTupleObject):
+ return False
+ if self is w_other:
+ return True
+ if self.user_overridden_class or w_other.user_overridden_class:
+ return False
+ # empty tuples are unique-ified
+ return 0 == w_other.length() == self.length()
+
+ def immutable_unique_id(self, space):
+ if self.user_overridden_class or self.length() > 0:
+ return None
+ # empty tuple: base value 258
+ uid = (258 << IDTAG_SHIFT) | IDTAG_SPECIAL
+ return space.wrap(uid)
+
def __repr__(self):
"""representation for debugging purposes"""
reprlist = [repr(w_item) for w_item in self.tolist()]
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -18,6 +18,7 @@
from pypy.objspace.std.basestringtype import basestring_typedef
from pypy.objspace.std.formatting import mod_format
from pypy.objspace.std.stringmethods import StringMethods
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
__all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode',
'encode_object', 'decode_object', 'unicode_from_object',
@@ -52,12 +53,26 @@
return True
if self.user_overridden_class or w_other.user_overridden_class:
return False
- return space.unicode_w(self) is space.unicode_w(w_other)
+ s1 = space.unicode_w(self)
+ s2 = space.unicode_w(w_other)
+ if len(s2) > 1:
+ return s1 is s2
+ else: # strings of len <= 1 are unique-ified
+ return s1 == s2
def immutable_unique_id(self, space):
if self.user_overridden_class:
return None
- return space.wrap(compute_unique_id(space.unicode_w(self)))
+ s = space.unicode_w(self)
+ if len(s) > 1:
+ uid = compute_unique_id(s)
+ else: # strings of len <= 1 are unique-ified
+ if len(s) == 1:
+ base = ~ord(s[0]) # negative base values
+ else:
+ base = 257 # empty unicode string: base value 257
+ uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL
+ return space.wrap(uid)
def str_w(self, space):
return space.str_w(space.str(self))
diff --git a/pypy/objspace/std/util.py b/pypy/objspace/std/util.py
--- a/pypy/objspace/std/util.py
+++ b/pypy/objspace/std/util.py
@@ -9,6 +9,11 @@
IDTAG_FLOAT = 5
IDTAG_COMPLEX = 7
IDTAG_METHOD = 9
+IDTAG_SPECIAL = 11 # -1 - (-maxunicode-1): unichar
+ # 0 - 255: char
+ # 256: empty string
+ # 257: empty unicode
+ # 258: empty tuple
CMP_OPS = dict(lt='<', le='<=', eq='==', ne='!=', gt='>', ge='>=')
BINARY_BITWISE_OPS = {'and': '&', 'lshift': '<<', 'or': '|', 'rshift': '>>',
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit