Rémi Lapeyre <remi.lape...@henki.fr> added the comment: Hi what do you think of a patch to this effect that would speed up operations without changing the current semantics?
diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index cff75a48d6..fe5d5b2dca 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -561,6 +561,7 @@ class Counter(dict): if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) super(Counter, self).__init__() + self._nonpositives = [] self.update(*args, **kwds) def __missing__(self, key): @@ -652,6 +653,12 @@ class Counter(dict): self[elem] = count + self_get(elem, 0) else: super(Counter, self).update(iterable) # fast path when counter is empty + for elem, count in self.items(): + try: + if not count > 0: + self._nonpositives.append(elem) + except TypeError: + pass else: _count_elements(self, iterable) if kwds: @@ -818,9 +825,10 @@ class Counter(dict): def _keep_positive(self): '''Internal method to strip elements with a negative or zero count''' - nonpositive = [elem for elem, count in self.items() if not count > 0] - for elem in nonpositive: - del self[elem] + for elem in self._nonpositives: + if not self[elem] > 0: + del self[elem] + self._nonpositives.clear() return self def __iadd__(self, other): @@ -833,7 +841,10 @@ class Counter(dict): ''' for elem, count in other.items(): - self[elem] += count + count = self[elem] + count + self[elem] = count + if not count > 0: + self._nonpositives.append(elem) return self._keep_positive() def __isub__(self, other): @@ -846,7 +857,10 @@ class Counter(dict): ''' for elem, count in other.items(): - self[elem] -= count + count = self[elem] - count + self[elem] = count + if not count > 0: + del self[elem] return self._keep_positive() def __ior__(self, other): @@ -858,10 +872,11 @@ class Counter(dict): Counter({'b': 3, 'c': 2, 'a': 1}) ''' - for elem, other_count in other.items(): - count = self[elem] - if other_count > count: - self[elem] = other_count + for elem, count in other.items(): + count = max(count, self[elem]) + self[elem] = count + if not count > 0: + self._nonpositives.append(elem) return self._keep_positive() def __iand__(self, other): @@ -874,9 +889,10 @@ class Counter(dict): ''' for elem, count in self.items(): - other_count = other[elem] - if other_count < count: - self[elem] = other_count + count = min(other[elem], count) + self[elem] = count + if not count > 0: + self._nonpositives.append(elem) return self._keep_positive() It seems to give great improvements for the case Nikita Smetanin gave: ➜ cpython git:(speed-up-Counter) ✗ ./python.exe tests.py Setup took: 9.101021380999999 Computation took: 1.634299999864197e-05 ➜ cpython git:(speed-up-Counter) ✗ python3 tests.py Setup took: 11.615437155 Computation took: 0.34183154800000004 Is there a performance test suite for Counter I could use to check this does not add regressions in some cases? ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <https://bugs.python.org/issue36380> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com