Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-elasticsearch-dsl for openSUSE:Factory checked in at 2024-01-21 23:08:41 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-elasticsearch-dsl (Old) and /work/SRC/openSUSE:Factory/.python-elasticsearch-dsl.new.16006 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-elasticsearch-dsl" Sun Jan 21 23:08:41 2024 rev:9 rq:1140106 version:8.12.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-elasticsearch-dsl/python-elasticsearch-dsl.changes 2023-11-27 22:45:24.942350224 +0100 +++ /work/SRC/openSUSE:Factory/.python-elasticsearch-dsl.new.16006/python-elasticsearch-dsl.changes 2024-01-21 23:09:01.870981527 +0100 @@ -1,0 +2,9 @@ +Sat Jan 20 12:39:22 UTC 2024 - Dirk Müller <dmuel...@suse.com> + +- update to 8.12.0: + * Added Search.knn() method + * Added Search.rank() method (undocumented as it still is in + technical preview) + * Fixed importing collapse from dictionary + +------------------------------------------------------------------- Old: ---- elasticsearch-dsl-8.11.0.tar.gz New: ---- elasticsearch-dsl-8.12.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-elasticsearch-dsl.spec ++++++ --- /var/tmp/diff_new_pack.LD6Tza/_old 2024-01-21 23:09:03.307033874 +0100 +++ /var/tmp/diff_new_pack.LD6Tza/_new 2024-01-21 23:09:03.323034457 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-elasticsearch-dsl # -# Copyright (c) 2023 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,15 +16,18 @@ # +%{?sle15_python_module_pythons} Name: python-elasticsearch-dsl -Version: 8.11.0 +Version: 8.12.0 Release: 0 Summary: Python client for Elasticsearch License: Apache-2.0 Group: Development/Languages/Python URL: https://github.com/elasticsearch/elasticsearch-dsl-py Source: https://github.com/elastic/elasticsearch-dsl-py/archive/refs/tags/v%{version}.tar.gz#/elasticsearch-dsl-%{version}.tar.gz +BuildRequires: %{python_module pip} BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module wheel} BuildRequires: fdupes BuildRequires: python-rpm-macros # gh#elastic/elasticsearch-dsl-py#1569 @@ -47,10 +50,10 @@ %autosetup -p1 -n elasticsearch-dsl-py-%{version} %build -%python_build +%pyproject_wheel %install -%python_install +%pyproject_install %python_expand %fdupes %{buildroot}%{$python_sitelib} %check @@ -59,6 +62,6 @@ %files %{python_files} %doc AUTHORS Changelog.rst README %license LICENSE -%{python_sitelib}/elasticsearch_dsl/ -%{python_sitelib}/elasticsearch_dsl-%{version}*-info/ +%{python_sitelib}/elasticsearch_dsl +%{python_sitelib}/elasticsearch_dsl-%{version}.dist-info ++++++ elasticsearch-dsl-8.11.0.tar.gz -> elasticsearch-dsl-8.12.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/elasticsearch-dsl-py-8.11.0/.gitignore new/elasticsearch-dsl-py-8.12.0/.gitignore --- old/elasticsearch-dsl-py-8.11.0/.gitignore 2023-11-13 12:54:53.000000000 +0100 +++ new/elasticsearch-dsl-py-8.12.0/.gitignore 2024-01-19 12:11:44.000000000 +0100 @@ -14,3 +14,6 @@ venv .idea .pytest_cache + +# sample code for GitHub issues +issues diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/elasticsearch-dsl-py-8.11.0/Changelog.rst new/elasticsearch-dsl-py-8.12.0/Changelog.rst --- old/elasticsearch-dsl-py-8.11.0/Changelog.rst 2023-11-13 12:54:53.000000000 +0100 +++ new/elasticsearch-dsl-py-8.12.0/Changelog.rst 2024-01-19 12:11:44.000000000 +0100 @@ -3,11 +3,22 @@ Changelog ========= +8.12.0 (2024-01-18) +------------------- + +* Added ``Search.knn()`` method (`#1691`_) +* Added ``Search.rank()`` method (undocumented as it still is in technical preview) (`#1692`_) +* Fixed importing collapse from dictionary (`#1689`_) + +.. _#1689: https://github.com/elastic/elasticsearch-dsl-py/pull/1689 +.. _#1691: https://github.com/elastic/elasticsearch-dsl-py/pull/1691 +.. _#1692: https://github.com/elastic/elasticsearch-dsl-py/pull/1692 + 8.11.0 (2023-11-13) ------------------- * Added support for Python 3.12 (`#1680`_) -* Added support for Search.collase() (`#1649`_, contributed by `@qcoumes`_) +* Added ``Search.collapse()`` (`#1649`_, contributed by `@qcoumes`_) .. _@qcoumes: https://github.com/qcoumes .. _#1680: https://github.com/elastic/elasticsearch-dsl-py/pull/1680 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/elasticsearch-dsl-py-8.11.0/docs/search_dsl.rst new/elasticsearch-dsl-py-8.12.0/docs/search_dsl.rst --- old/elasticsearch-dsl-py-8.11.0/docs/search_dsl.rst 2023-11-13 12:54:53.000000000 +0100 +++ new/elasticsearch-dsl-py-8.12.0/docs/search_dsl.rst 2024-01-19 12:11:44.000000000 +0100 @@ -14,6 +14,8 @@ * aggregations + * k-nearest neighbor searches + * sort * pagination @@ -352,6 +354,31 @@ done in-place (does not return a copy). +K-Nearest Neighbor Searches +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To issue a kNN search, use the ``.knn()`` method: + +.. code:: python + + s = Search() + vector = get_embedding("search text") + + s = s.knn( + field="embedding", + k=5, + num_candidates=10, + query_vector=vector + ) + +The ``field``, ``k`` and ``num_candidates`` arguments can be given as +positional or keyword arguments and are required. In addition to these, +``query_vector`` or ``query_vector_builder`` must be given as well. + +The ``.knn()`` method can be invoked multiple times to include multiple kNN +searches in the request. + + Sorting ~~~~~~~ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/elasticsearch-dsl-py-8.11.0/elasticsearch_dsl/__init__.py new/elasticsearch-dsl-py-8.12.0/elasticsearch_dsl/__init__.py --- old/elasticsearch-dsl-py-8.11.0/elasticsearch_dsl/__init__.py 2023-11-13 12:54:53.000000000 +0100 +++ new/elasticsearch-dsl-py-8.12.0/elasticsearch_dsl/__init__.py 2024-01-19 12:11:44.000000000 +0100 @@ -84,7 +84,7 @@ from .utils import AttrDict, AttrList, DslBase from .wrappers import Range -VERSION = (8, 11, 0) +VERSION = (8, 12, 0) __version__ = VERSION __versionstr__ = ".".join(map(str, VERSION)) __all__ = [ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/elasticsearch-dsl-py-8.11.0/elasticsearch_dsl/search.py new/elasticsearch-dsl-py-8.12.0/elasticsearch_dsl/search.py --- old/elasticsearch-dsl-py-8.11.0/elasticsearch_dsl/search.py 2023-11-13 12:54:53.000000000 +0100 +++ new/elasticsearch-dsl-py-8.12.0/elasticsearch_dsl/search.py 2024-01-19 12:11:44.000000000 +0100 @@ -24,7 +24,7 @@ from .aggs import A, AggBase from .connections import get_connection from .exceptions import IllegalOperation -from .query import Bool, Q +from .query import Bool, Q, Query from .response import Hit, Response from .utils import AttrDict, DslBase, recursive_to_dict @@ -120,7 +120,6 @@ self._doc_type = [] self._doc_type_map = {} - self._collapse = {} if isinstance(doc_type, (tuple, list)): self._doc_type.extend(doc_type) elif isinstance(doc_type, collections.abc.Mapping): @@ -294,7 +293,6 @@ s = self.__class__( using=self._using, index=self._index, doc_type=self._doc_type ) - s._collapse = self._collapse.copy() s._doc_type_map = self._doc_type_map.copy() s._extra = self._extra.copy() s._params = self._params.copy() @@ -320,6 +318,8 @@ self.aggs = AggsProxy(self) self._sort = [] + self._knn = [] + self._rank = {} self._collapse = {} self._source = None self._highlight = {} @@ -408,6 +408,9 @@ s = super()._clone() s._response_class = self._response_class + s._knn = [knn.copy() for knn in self._knn] + s._rank = self._rank.copy() + s._collapse = self._collapse.copy() s._sort = self._sort[:] s._source = copy.copy(self._source) if self._source is not None else None s._highlight = self._highlight.copy() @@ -446,6 +449,14 @@ self.aggs._params = { "aggs": {name: A(value) for (name, value) in aggs.items()} } + if "knn" in d: + self._knn = d.pop("knn") + if isinstance(self._knn, dict): + self._knn = [self._knn] + if "rank" in d: + self._rank = d.pop("rank") + if "collapse" in d: + self._collapse = d.pop("collapse") if "sort" in d: self._sort = d.pop("sort") if "_source" in d: @@ -493,6 +504,85 @@ s._script_fields.update(kwargs) return s + def knn( + self, + field, + k, + num_candidates, + query_vector=None, + query_vector_builder=None, + boost=None, + filter=None, + similarity=None, + ): + """ + Add a k-nearest neighbor (kNN) search. + + :arg field: the name of the vector field to search against + :arg k: number of nearest neighbors to return as top hits + :arg num_candidates: number of nearest neighbor candidates to consider per shard + :arg query_vector: the vector to search for + :arg query_vector_builder: A dictionary indicating how to build a query vector + :arg boost: A floating-point boost factor for kNN scores + :arg filter: query to filter the documents that can match + :arg similarity: the minimum similarity required for a document to be considered a match, as a float value + + Example:: + + s = Search() + s = s.knn(field='embedding', k=5, num_candidates=10, query_vector=vector, + filter=Q('term', category='blog'))) + """ + s = self._clone() + s._knn.append( + { + "field": field, + "k": k, + "num_candidates": num_candidates, + } + ) + if query_vector is None and query_vector_builder is None: + raise ValueError("one of query_vector and query_vector_builder is required") + if query_vector is not None and query_vector_builder is not None: + raise ValueError( + "only one of query_vector and query_vector_builder must be given" + ) + if query_vector is not None: + s._knn[-1]["query_vector"] = query_vector + if query_vector_builder is not None: + s._knn[-1]["query_vector_builder"] = query_vector_builder + if boost is not None: + s._knn[-1]["boost"] = boost + if filter is not None: + if isinstance(filter, Query): + s._knn[-1]["filter"] = filter.to_dict() + else: + s._knn[-1]["filter"] = filter + if similarity is not None: + s._knn[-1]["similarity"] = similarity + return s + + def rank(self, rrf=None): + """ + Defines a method for combining and ranking results sets from a combination + of searches. Requires a minimum of 2 results sets. + + :arg rrf: Set to ``True`` or an options dictionary to set the rank method to reciprocal rank fusion (RRF). + + Example:: + s = Search() + s = s.query('match', content='search text') + s = s.knn(field='embedding', k=5, num_candidates=10, query_vector=vector) + s = s.rank(rrf=True) + + Note: This option is in technical preview and may change in the future. The syntax will likely change before GA. + """ + s = self._clone() + s._rank = {} + if rrf is not None and rrf is not False: + s._rank["rrf"] = {} if rrf is True else rrf + return s + def source(self, fields=None, **kwargs): """ Selectively control how the _source field is returned. @@ -676,6 +766,15 @@ if self.query: d["query"] = self.query.to_dict() + if self._knn: + if len(self._knn) == 1: + d["knn"] = self._knn[0] + else: + d["knn"] = self._knn + + if self._rank: + d["rank"] = self._rank + # count request doesn't care for sorting and other things if not count: if self.post_filter: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/elasticsearch-dsl-py-8.11.0/setup.py new/elasticsearch-dsl-py-8.12.0/setup.py --- old/elasticsearch-dsl-py-8.11.0/setup.py 2023-11-13 12:54:53.000000000 +0100 +++ new/elasticsearch-dsl-py-8.12.0/setup.py 2024-01-19 12:11:44.000000000 +0100 @@ -19,7 +19,7 @@ from setuptools import find_packages, setup -VERSION = (8, 11, 0) +VERSION = (8, 12, 0) __version__ = VERSION __versionstr__ = ".".join(map(str, VERSION)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/elasticsearch-dsl-py-8.11.0/tests/test_search.py new/elasticsearch-dsl-py-8.12.0/tests/test_search.py --- old/elasticsearch-dsl-py-8.11.0/tests/test_search.py 2023-11-13 12:54:53.000000000 +0100 +++ new/elasticsearch-dsl-py-8.12.0/tests/test_search.py 2024-01-19 12:11:44.000000000 +0100 @@ -234,6 +234,72 @@ assert s._doc_type_map == {} +def test_knn(): + s = search.Search() + + with raises(TypeError): + s.knn() + with raises(TypeError): + s.knn("field") + with raises(TypeError): + s.knn("field", 5) + with raises(ValueError): + s.knn("field", 5, 100) + with raises(ValueError): + s.knn("field", 5, 100, query_vector=[1, 2, 3], query_vector_builder={}) + + s = s.knn("field", 5, 100, query_vector=[1, 2, 3]) + assert { + "knn": { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + } + } == s.to_dict() + + s = s.knn( + k=4, + num_candidates=40, + boost=0.8, + field="name", + query_vector_builder={ + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + ) + assert { + "knn": [ + { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + }, + { + "field": "name", + "k": 4, + "num_candidates": 40, + "query_vector_builder": { + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + "boost": 0.8, + }, + ] + } == s.to_dict() + + +def test_rank(): + s = search.Search() + s.rank(rrf=False) + assert {} == s.to_dict() + + s = s.rank(rrf=True) + assert {"rank": {"rrf": {}}} == s.to_dict() + + s = s.rank(rrf={"window_size": 50, "rank_constant": 20}) + assert {"rank": {"rrf": {"window_size": 50, "rank_constant": 20}}} == s.to_dict() + + def test_sort(): s = search.Search() s = s.sort("fielda", "-fieldb") @@ -544,10 +610,12 @@ s = search.Search() s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) s.update_from_dict({"_source": ["id", "name"]}) + s.update_from_dict({"collapse": {"field": "user_id"}}) assert { "indices_boost": [{"important-documents": 2}], "_source": ["id", "name"], + "collapse": {"field": "user_id"}, } == s.to_dict()