Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r51318:c863145453cb
Date: 2011-12-22 10:44 +0100
http://bitbucket.org/pypy/pypy/changeset/c863145453cb/
Log: Implement str.isidentifier()
diff --git a/pypy/objspace/std/test/test_unicodeobject.py
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -162,6 +162,13 @@
assert "!Brown Fox".istitle() == True
assert "Brow&&&&N Fox".istitle() == True
assert "!Brow&&&&n Fox".istitle() == False
+
+ def test_isidentifier(self):
+ assert "".isidentifier() is False
+ assert "a4".isidentifier() is True
+ assert "_var".isidentifier() is True
+ assert "_!var".isidentifier() is False
+ assert "3abc".isidentifier() is False
def test_capitalize(self):
assert "brown fox".capitalize() == "Brown fox"
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -279,6 +279,27 @@
previous_is_cased = False
return space.newbool(cased)
+def unicode_isidentifier__Unicode(space, w_unicode):
+ v = w_unicode._value
+ if len(v) == 0:
+ return space.w_False
+
+ # PEP 3131 says that the first character must be in XID_Start and
+ # subsequent characters in XID_Continue, and for the ASCII range,
+ # the 2.x rules apply (i.e start with letters and underscore,
+ # continue with letters, digits, underscore). However, given the
+ # current definition of XID_Start and XID_Continue, it is
+ # sufficient to check just for these, except that _ must be
+ # allowed as starting an identifier.
+ first = v[0]
+ if not (unicodedb.isxidstart(ord(first)) or first == u'_'):
+ return space.w_False
+
+ for i in range(1, len(v)):
+ if not unicodedb.isxidcontinue(ord(v[i])):
+ return space.w_False
+ return space.w_True
+
def _strip(space, w_self, w_chars, left, right):
"internal function called by str_xstrip methods"
u_self = w_self._value
diff --git a/pypy/objspace/std/unicodetype.py b/pypy/objspace/std/unicodetype.py
--- a/pypy/objspace/std/unicodetype.py
+++ b/pypy/objspace/std/unicodetype.py
@@ -107,6 +107,10 @@
' characters in S are uppercase and there is\nat'
' least one cased character in S, False'
' otherwise.')
+unicode_isidentifier = SMM('isidentifier', 1,
+ doc='S.isidentifier() -> bool\n\nReturn True if S is'
+ ' a valid identifier according\nto the language'
+ ' definition.')
unicode_join = SMM('join', 2,
doc='S.join(sequence) -> unicode\n\nReturn a string'
' which is the concatenation of the strings in'
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit