Author: Philip Jenvey <[email protected]>
Branch: length-hint
Changeset: r57747:117775ce3778
Date: 2012-10-02 11:46 -0700
http://bitbucket.org/pypy/pypy/changeset/117775ce3778/
Log: hook length_hinting into map/filter/zip
diff --git a/pypy/module/__builtin__/app_functional.py
b/pypy/module/__builtin__/app_functional.py
--- a/pypy/module/__builtin__/app_functional.py
+++ b/pypy/module/__builtin__/app_functional.py
@@ -2,6 +2,9 @@
Plain Python definition of the builtin functions oriented towards
functional programming.
"""
+from __future__ import with_statement
+import operator
+from __pypy__ import resizelist_hint
# ____________________________________________________________
@@ -66,38 +69,69 @@
if num_collections == 1:
if none_func:
return list(collections[0])
- else:
- # Special case for the really common case of a single collection,
- # this can be eliminated if we could unroll that loop that creates
- # `args` based on whether or not len(collections) was constant
- result = []
- for item in collections[0]:
+ # Special case for the really common case of a single collection,
+ # this can be eliminated if we could unroll that loop that creates
+ # `args` based on whether or not len(collections) was constant
+ seq = collections[0]
+ with _managed_newlist_hint(operator._length_hint(seq, 0)) as result:
+ for item in seq:
result.append(func(item))
return result
+
+ # Gather the iterators (pair of (iter, has_finished)) and guess the
+ # result length (the max of the input lengths)
+ iterators = []
+ max_hint = 0
+ for seq in collections:
+ iterators.append((iter(seq), False))
+ max_hint = max(max_hint, operator._length_hint(seq, 0))
+
+ with _managed_newlist_hint(max_hint) as result:
+ while True:
+ cont = False
+ args = []
+ for idx, (iterator, has_finished) in enumerate(iterators):
+ val = None
+ if not has_finished:
+ try:
+ val = next(iterator)
+ except StopIteration:
+ iterators[idx] = (None, True)
+ else:
+ cont = True
+ args.append(val)
+ args = tuple(args)
+ if cont:
+ if none_func:
+ result.append(args)
+ else:
+ result.append(func(*args))
+ else:
+ return result
+
+def _newlist_hint(length_hint):
+ """Return an empty list with an underlying capacity of length_hint"""
result = []
- # Pair of (iterator, has_finished)
- iterators = [(iter(seq), False) for seq in collections]
- while True:
- cont = False
- args = []
- for idx, (iterator, has_finished) in enumerate(iterators):
- val = None
- if not has_finished:
- try:
- val = next(iterator)
- except StopIteration:
- iterators[idx] = (None, True)
- else:
- cont = True
- args.append(val)
- args = tuple(args)
- if cont:
- if none_func:
- result.append(args)
- else:
- result.append(func(*args))
- else:
- return result
+ resizelist_hint(result, length_hint)
+ return result
+
+def _managed_newlist_hint(length_hint):
+ """Context manager returning a _newlist_hint upon entry.
+
+ Upon exit the list's underlying capacity will be cut back to match
+ its length if necessary (incase the initial length_hint was too
+ large).
+ """
+ # hack: can't import contextlib.contextmanager at the global level
+ from contextlib import contextmanager
+ @contextmanager
+ def _do_managed_newlist_hint(length_hint):
+ result = _newlist_hint(length_hint)
+ yield result
+ extended = len(result)
+ if extended < length_hint:
+ resizelist_hint(result, extended)
+ return _do_managed_newlist_hint(length_hint)
sentinel = object()
@@ -135,17 +169,18 @@
return _filter_string(func, seq, unicode)
elif isinstance(seq, tuple):
return _filter_tuple(func, seq)
- result = []
- for item in seq:
- if func(item):
- result.append(item)
+ with _managed_newlist_hint(operator._length_hint(seq, 0)) as result:
+ for item in seq:
+ if func(item):
+ result.append(item)
return result
def _filter_string(func, string, str_type):
if func is bool and type(string) is str_type:
return string
- result = []
- for i in range(len(string)):
+ length = len(string)
+ result = _newlist_hint(length)
+ for i in range(length):
# You must call __getitem__ on the strings, simply iterating doesn't
# work :/
item = string[i]
@@ -156,8 +191,9 @@
return str_type().join(result)
def _filter_tuple(func, seq):
- result = []
- for i in range(len(seq)):
+ length = len(seq)
+ result = _newlist_hint(length)
+ for i in range(length):
# Again, must call __getitem__, at least there are tests.
item = seq[i]
if func(item):
@@ -172,11 +208,21 @@
in length to the length of the shortest argument sequence."""
if not sequences:
return []
- result = []
- iterators = [iter(seq) for seq in sequences]
- while True:
- try:
- items = [next(it) for it in iterators]
- except StopIteration:
- return result
- result.append(tuple(items))
+
+ # Gather the iterators and guess the result length (the min of the
+ # input lengths)
+ iterators = []
+ min_hint = -1
+ for seq in sequences:
+ iterators.append(iter(seq))
+ hint = operator._length_hint(seq, min_hint)
+ if min_hint == -1 or hint < min_hint:
+ min_hint = hint
+
+ with _managed_newlist_hint(min_hint) as result:
+ while True:
+ try:
+ items = [next(it) for it in iterators]
+ except StopIteration:
+ return result
+ result.append(tuple(items))
diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py
--- a/pypy/module/__pypy__/__init__.py
+++ b/pypy/module/__pypy__/__init__.py
@@ -43,6 +43,7 @@
'do_what_I_mean' : 'interp_magic.do_what_I_mean',
'list_strategy' : 'interp_magic.list_strategy',
'validate_fd' : 'interp_magic.validate_fd',
+ 'resizelist_hint' : 'interp_magic.resizelist_hint',
'newdict' : 'interp_dict.newdict',
'dictstrategy' : 'interp_dict.dictstrategy',
}
diff --git a/pypy/module/__pypy__/interp_magic.py
b/pypy/module/__pypy__/interp_magic.py
--- a/pypy/module/__pypy__/interp_magic.py
+++ b/pypy/module/__pypy__/interp_magic.py
@@ -1,7 +1,8 @@
from pypy.interpreter.baseobjspace import ObjSpace, W_Root
from pypy.interpreter.error import OperationError, wrap_oserror
from pypy.interpreter.gateway import unwrap_spec
-from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.objectmodel import resizelist_hint, we_are_translated
+from pypy.objspace.std.listobject import W_ListObject
from pypy.objspace.std.typeobject import MethodCache
from pypy.objspace.std.mapdict import IndexCache
from pypy.rlib import rposix
@@ -75,7 +76,6 @@
return space.wrap(42)
def list_strategy(space, w_list):
- from pypy.objspace.std.listobject import W_ListObject
if isinstance(w_list, W_ListObject):
return space.wrap(w_list.strategy._applevel_repr)
else:
@@ -95,3 +95,10 @@
space.wrap('cp%d' % rwin32.GetConsoleCP()),
space.wrap('cp%d' % rwin32.GetConsoleOutputCP()),
])
+
+@unwrap_spec(sizehint=int)
+def resizelist_hint(space, w_iterable, sizehint):
+ if not isinstance(w_iterable, W_ListObject):
+ raise OperationError(space.w_TypeError,
+ space.wrap("arg 1 must be a 'list'"))
+ w_iterable._resize_hint(sizehint)
diff --git a/pypy/module/operator/__init__.py b/pypy/module/operator/__init__.py
--- a/pypy/module/operator/__init__.py
+++ b/pypy/module/operator/__init__.py
@@ -37,7 +37,7 @@
'sub', 'truediv', 'truth', 'xor',
'iadd', 'iand', 'iconcat', 'idiv', 'ifloordiv',
'ilshift', 'imod', 'imul', 'ior', 'ipow', 'irepeat',
- 'irshift', 'isub', 'itruediv', 'ixor']
+ 'irshift', 'isub', 'itruediv', 'ixor', '_length_hint']
interpleveldefs = {}
diff --git a/pypy/module/operator/interp_operator.py
b/pypy/module/operator/interp_operator.py
--- a/pypy/module/operator/interp_operator.py
+++ b/pypy/module/operator/interp_operator.py
@@ -1,4 +1,5 @@
from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import unwrap_spec
def index(space, w_a):
return space.index(w_a)
@@ -240,3 +241,8 @@
return space.inplace_mul(w_obj1, w_obj2)
+# _length_hint (to be length_hint in 3.4)
+
+@unwrap_spec(default=int)
+def _length_hint(space, w_iterable, default):
+ return space.wrap(space.length_hint(w_iterable, default))
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit