jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1191851?usp=email )
Change subject: [IMPR] add union_generators function to tools.itertools
......................................................................
[IMPR] add union_generators function to tools.itertools
This can be used to merge sorted generators like in pagereferences()
Change-Id: Ibc1c2ef362a0703717b3331afb10cc238b61c9c8
---
M pywikibot/tools/itertools.py
1 file changed, 55 insertions(+), 6 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/tools/itertools.py b/pywikibot/tools/itertools.py
index e5911ba..d802c56 100644
--- a/pywikibot/tools/itertools.py
+++ b/pywikibot/tools/itertools.py
@@ -4,19 +4,25 @@
in :mod:`backports`
"""
#
-# (C) Pywikibot team, 2008-2024
+# (C) Pywikibot team, 2008-2025
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import collections
+import heapq
import itertools
from contextlib import suppress
-from itertools import chain, zip_longest
from typing import Any
-from pywikibot.backports import Generator, batched
+from pywikibot.backports import (
+ Callable,
+ Generator,
+ Iterable,
+ Iterator,
+ batched,
+)
from pywikibot.logging import debug
from pywikibot.tools import deprecated
@@ -27,6 +33,7 @@
'islice_with_ellipsis',
'itergroup',
'roundrobin_generators',
+ 'union_generators',
)
@@ -90,6 +97,47 @@
yield marker
+def union_generators(*iterables: Iterable[Any],
+ key: Callable[[Any], Any] | None = None,
+ reverse: bool = False) -> Iterator[Any]:
+ """Generator of union of sorted iterables.
+
+ Yield all items from the input iterables in sorted order, removing
+ duplicates. The input iterables must already be sorted according to
+ the same *key* and direction. For descending direction, *reverse*
+ must be ``True``. The generator will yield each element only once,
+ even if it appears in multiple iterables. This behaves similarly to:
+
+ sorted(set(itertools.chain(*iterables)), key=key, reverse=reverse)
+
+ but is memory-efficient since it processes items lazily.
+
+ Sample:
+
+ >>> list(union_generators([1, 2, 3, 4], [3, 4, 5], [2, 6]))
+ [1, 2, 3, 4, 5, 6]
+ >>> list(union_generators([4, 3, 2, 1], [5, 4, 3], [6, 2], reverse=True))
+ [6, 5, 4, 3, 2, 1]
+
+ .. versionadded:: 10.6
+
+ .. note::
+ All input iterables must be sorted consistently. *reverse* must
+ be set to ``True`` only if the iterables are sorted in descending
+ order. For simple concatenation without duplicate removal, use
+ :pylib:`itertools.chain<itertools#itertools.chain>` instead.
+
+ :param iterables: Sorted iterables to merge.
+ :param key: Optional key function to compare elements. If ``None``,
+ items are compared directly.
+ :param reverse: Whether the input iterables are sorted in descending
+ order.
+ :return: Generator yielding all unique items in sorted order.
+ """
+ merged = heapq.merge(*iterables, key=key, reverse=reverse)
+ return (list(group)[0] for _, group in itertools.groupby(merged, key=key))
+
+
def intersect_generators(*iterables, allow_duplicates: bool = False):
"""Generator of intersect iterables.
@@ -155,7 +203,7 @@
# Get items from iterables in a round-robin way.
sentinel = object()
- for items in zip_longest(*iterables, fillvalue=sentinel):
+ for items in itertools.zip_longest(*iterables, fillvalue=sentinel):
for index, item in enumerate(items):
if item is sentinel:
@@ -184,7 +232,8 @@
# a subset of active iterables.
if len(active_iterables) < n_gen:
cached_iterables = set(
- chain.from_iterable(v.keys() for v in cache.values()))
+ itertools.chain.from_iterable(v.keys()
+ for v in cache.values()))
if cached_iterables <= active_iterables:
return
@@ -210,7 +259,7 @@
sentinel = object()
return (item
for item in itertools.chain.from_iterable(
- zip_longest(*iterables, fillvalue=sentinel))
+ itertools.zip_longest(*iterables, fillvalue=sentinel))
if item is not sentinel)
--
To view, visit
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1191851?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ibc1c2ef362a0703717b3331afb10cc238b61c9c8
Gerrit-Change-Number: 1191851
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Matěj Suchánek <[email protected]>
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]