This is an automated email from the ASF dual-hosted git repository.
wenjin272 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/flink-agents.git
The following commit(s) were added to refs/heads/main by this push:
new 1181f69f [doc] Update document for vector store. (#654)
1181f69f is described below
commit 1181f69f3dde16612e34e35fc7bdcfe443daf753
Author: Wenjin Xie <[email protected]>
AuthorDate: Mon May 11 10:43:45 2026 +0800
[doc] Update document for vector store. (#654)
---
docs/content/docs/development/vector_stores.md | 443 ++++++++++++++-----------
1 file changed, 241 insertions(+), 202 deletions(-)
diff --git a/docs/content/docs/development/vector_stores.md
b/docs/content/docs/development/vector_stores.md
index 4b730a88..5909e71a 100644
--- a/docs/content/docs/development/vector_stores.md
+++ b/docs/content/docs/development/vector_stores.md
@@ -40,8 +40,8 @@ In Flink Agents, vector stores are essential for:
- **Semantic Similarity**: Comparing and ranking documents by meaning rather
than keywords
### Concepts
-* **Document**: Document is the abstraction that represents a piece of text
and associated metadata.
-* **Collection**: Collection is the abstraction that represents a set of
documents. It corresponds to different concept for different vector store
specification, like index in Elasticsearch and collection in Chroma.
+* **Document**: Document is the abstraction that represents a piece of text
and associated metadata. A document may also carry a pre-computed `embedding`
vector and a `score` populated by query results.
+* **Filter DSL**: A unified, equality-only metadata filter dialect shared by
`query`, `get`, and `delete`. The DSL covers only the subset every supported
backend can honour (equality matching), so callers don't need to know each
store's native operators. See the [Filter DSL](#filter-dsl) section below for
details.
## How to use
@@ -97,7 +97,10 @@ Vector stores use structured query objects for consistent
interfaces:
# Create a semantic search query
query = VectorStoreQuery(
query_text="What is Apache Flink Agents?",
- limit=3
+ limit=3,
+ collection_name="my_collection", # optional: defaults to the
store's collection
+ filters={"category": "docs"}, # optional: unified equality filter
+ extra_args={"where_document": {...}}, # optional: backend-specific
parameters
)
```
@@ -106,10 +109,20 @@ query = VectorStoreQuery(
{{< tab "Java" >}}
```java
-// Create a semantic search query
+// Simple semantic-search query (defaults to default collection, no filter)
VectorStoreQuery query = new VectorStoreQuery(
"What is Apache Flink Agents?", // query text
- 3 // limit
+ 3 // limit
+);
+
+// Query with filters and explicit collection
+VectorStoreQuery filteredQuery = new VectorStoreQuery(
+ VectorStoreQueryMode.SEMANTIC,
+ "What is Apache Flink Agents?",
+ 3,
+ "my_collection",
+ Map.of("category", "docs"), // unified equality filter
+ Map.of() // extraArgs (backend-specific)
);
```
@@ -127,6 +140,8 @@ The `VectorStoreQueryResult` contains:
- **content**: The actual text content of the document
- **metadata**: Associated metadata (source, category, timestamp, etc.)
- **id**: Unique identifier of the document (if available)
+ - **embedding**: The pre-computed embedding vector (if available)
+ - **score**: Similarity / distance score against the query (only populated
by query results; `null` for non-query operations such as `get`). Semantics —
distance vs. similarity, metric — are implementation-specific; consult each
store's documentation.
{{< tabs "Query Results" >}}
@@ -152,13 +167,12 @@ VectorStoreQueryResult result = vectorStore.query(query);
### Manage collections
-User can dynamic create, get or delete collections in agent execution:
-* `get_or_create_collection`: Get a collection by name, create if not exists.
User can provide additional metadatas.
-* `get_collection`: Get a collection by name. The collection must be created
by flink-agents before.
-* `delete_collection`: Delete a collection by name.
+For vector stores that implement `CollectionManageableVectorStore`, you can
create or delete collections during agent execution:
+* `create_collection_if_not_exists` / `createCollectionIfNotExists`: Create
the collection if it doesn't already exist; no-op otherwise. Backend-specific
options (e.g. Chroma's `metadata`, Pinecone's `dimension` / `metric`) can be
passed via `**kwargs` / `kwargs`. Unknown keys are ignored.
+* `delete_collection` / `deleteCollection`: Delete a collection by name.
{{< hint info >}}
-Collection level operations is only supported for vector store that implements
`CollectionManageableVectorStore`. Currently, Chroma and Elasticsearch.
+Collection-level operations are only supported for vector stores that
implement `CollectionManageableVectorStore`. Among the built-in providers,
Chroma (Python), Elasticsearch (Java) and OpenSearch (Java) implement this
interface.
{{< /hint >}}
{{< tabs "Collection level operations" >}}
@@ -169,15 +183,14 @@ Collection level operations is only supported for vector
store that implements `
# get the vector store from runner context
vector_store: CollectionManageableVectorStore =
ctx.get_resource("vector_store", ResourceType.VECTOR_STORE)
-# create a collection
-collection: Collection = vector_store.get_or_create_collection("my_collection"
, metadata={"key1": "value1", "key2": "value2"})
-# get the collection
-collection: Collection = vector_store.get_collection("my_collection")
-# get the collection metadata
-metadata = collection.metadata
+# create a collection (no-op if it already exists)
+vector_store.create_collection_if_not_exists(
+ "my_collection",
+ metadata={"key1": "value1", "key2": "value2"}, # backend-specific,
ignored if unsupported
+)
# delete the collection
-vector_store.delete_collection("my_collection)
+vector_store.delete_collection("my_collection")
```
{{< /tab >}}
@@ -190,13 +203,10 @@ CollectionManageableVectorStore vectorStore =
(CollectionManageableVectorStore)
ctx.getResource("vector_store", ResourceType.VECTOR_STORE);
-// create a collection
-Collection collection = vectorStore.getOrCreateCollection(
- "my_collection", Map.of("key1", "value1", "key2", "value2"));
-// get the collection
-collection = vectorStore.getCollection("my_collection");
-// get the collection metadata
-Map<String, Object> metadata = collection.getMetadata();
+// create a collection (no-op if it already exists)
+vectorStore.createCollectionIfNotExists(
+ "my_collection",
+ Map.of("key1", "value1", "key2", "value2")); // backend-specific,
ignored if unsupported
// delete the collection
vectorStore.deleteCollection("my_collection");
@@ -208,13 +218,14 @@ vectorStore.deleteCollection("my_collection");
### Manage documents
-User can dynamic add, get or delete documents in agent execution:
-* `add`: Add documents to a collection. If document ID is not specified, will
generate random ID for each document.
-* `get`: Get documents from a collection by IDs. If no IDs are provided, get
all documents.
-* `delete`: Delete documents from a collection by IDs. If no IDs are provided,
delete all documents.
+You can add, update, get, or delete documents during agent execution:
+* `add`: Add documents to a collection. If a document has no `id`, the
implementation generates one. Documents whose `embedding` field is `None` are
auto-embedded by the configured embedding model.
+* `update`: Update existing documents in place. Identity is read from
`Document.id` — every document must have its `id` set; unlike `add`, `update`
does not generate ids.
+* `get`: Retrieve documents from a collection. When `ids` is provided, only
those documents are returned. Otherwise up to `limit` documents matching
`filters` are returned (default `limit=100`; pass `None` / `null` for
unbounded).
+* `delete`: Delete documents from a collection by `ids` or `filters`. When
neither is provided, all documents in the collection are deleted.
{{< hint info >}}
-If collection name is not specified, the document level operations will apply
to the default collection configured by vector store initialization parameters.
+If `collection_name` / `collection` is not specified, document-level
operations apply to the default collection configured at vector-store
initialization.
{{< /hint >}}
{{< tabs "Document level operations" >}}
@@ -223,23 +234,33 @@ If collection name is not specified, the document level
operations will apply to
```python
# get the vector store from runner context
-store: CollectionManageableVectorStore = ctx.get_resource("vector_store",
ResourceType.VECTOR_STORE)
+vector_store: CollectionManageableVectorStore =
ctx.get_resource("vector_store", ResourceType.VECTOR_STORE)
-# create or get a collection
-collection: Collection = vector_store.get_or_create_collection("my_collection"
, metadata={"key1": "value1", "key2": "value2"})
+# ensure the collection exists (no-op if it already does)
+vector_store.create_collection_if_not_exists("my_collection")
-# add documents to the collection
-documents = [Document(id="doc1", content="the first doc", metadata={"key":
"value1"}),
+# add documents to the collection (embeddings are auto-computed from `content`)
+documents = [Document(id="doc1", content="the first doc", metadata={"key":
"value1"}),
Document(id="doc2", content="the second doc", metadata={"key":
"value2"})]
vector_store.add(documents=documents, collection_name="my_collection")
+# update documents in place — every document must already have its `id` set
+vector_store.update(
+ documents=[Document(id="doc1", content="rewritten first doc",
metadata={"key": "value1"})],
+ collection_name="my_collection",
+)
+
# get documents by IDs
-doc: List[Document] = vector_store.get(ids="doc2",
collectioin_name="my_collection")
-# get all documents
-doc: List[Document] = vector_store.get(collectioin_name="my_collection")
+docs: List[Document] = vector_store.get(ids="doc2",
collection_name="my_collection")
+# get documents matching a metadata filter (limit defaults to 100; pass None
for unbounded)
+docs = vector_store.get(filters={"key": "value1"},
collection_name="my_collection")
+# get all documents (bounded by `limit`, defaults to 100)
+docs = vector_store.get(collection_name="my_collection")
# delete documents by IDs
vector_store.delete(ids=["doc1", "doc2"], collection_name="my_collection")
+# delete documents matching a metadata filter
+vector_store.delete(filters={"key": "value1"}, collection_name="my_collection")
# delete all documents
vector_store.delete(collection_name="my_collection")
```
@@ -251,39 +272,57 @@ vector_store.delete(collection_name="my_collection")
```java
// get the vector store from runner context
BaseVectorStore vectorStore =
- (BaseVectorStore)
- ctx.getResource("vectorStore",
ResourceType.VECTOR_STORE);
-// create or get a collection
-Collection collection = ((CollectionManageableVectorStore) vectorStore)
- .getOrCreateCollection("my_collection", Map.of("key1", "value1",
"key2", "value2"));
+ (BaseVectorStore) ctx.getResource("vectorStore",
ResourceType.VECTOR_STORE);
+
+// ensure the collection exists (no-op if it already does)
+((CollectionManageableVectorStore) vectorStore)
+ .createCollectionIfNotExists("my_collection", Map.of());
-// add documents to the collection
+// add documents to the collection (embeddings are auto-computed from
`content`)
List<Document> documents = List.of(
- new Document(
- "the first doc.",
- Map.of("key", "value1"),
- "doc1"),
- new Document(
- "the second doc",
- Map.of("key", "value2"),
- "doc2"));
-vectorStore.add(documents, "my_collection", Collections.emptyMap());
-
-// get documents by IDs
-List<Document> docs = vectorStore.get(List.of("doc1"), "my_collection",
Collections.emptyMap());
-// get all documents
-docs = vectorStore.get(null, "my_collection", Collections.emptyMap());
+ new Document("the first doc.", Map.of("key", "value1"), "doc1"),
+ new Document("the second doc", Map.of("key", "value2"), "doc2"));
+vectorStore.add(documents, "my_collection", Map.of());
+
+// update documents in place — every document must already have its `id` set
+vectorStore.update(
+ List.of(new Document("rewritten first doc", Map.of("key", "value1"),
"doc1")),
+ "my_collection",
+ Map.of());
+
+// get documents by IDs (convenience overloads avoid passing nulls)
+List<Document> docs = vectorStore.getByIds(List.of("doc1"), "my_collection");
+// get documents matching a metadata filter
+docs = vectorStore.getByFilters(Map.of("key", "value1"));
+// full signature — pass `limit=null` for unbounded
+docs = vectorStore.get(null, "my_collection", Map.of("key", "value1"), 100,
Map.of());
// delete documents by IDs
-vectorStore.delete(List.of("doc1", "doc2"), "my_collection",
Collections.emptyMap());
-// delete all documents
-vectorStore.delete(null, "my_collection", Collections.emptyMap());
+vectorStore.deleteByIds(List.of("doc1", "doc2"), "my_collection");
+// delete documents matching a metadata filter
+vectorStore.deleteByFilters(Map.of("key", "value1"));
+// delete all documents in a collection
+vectorStore.delete(null, "my_collection", null, Map.of());
```
{{< /tab >}}
{{< /tabs >}}
+#### Filter DSL
+
+`query`, `get`, and `delete` all accept the same unified `filters` map. The
dialect intentionally covers only the subset every backend supports — equality
matching — so callers don't have to know each store's native operators.
+
+```text
+# Equality — "field equals value":
+{"field": value}
+
+# Multiple top-level keys are implicitly AND-ed:
+{"user_id": "u1", "run_id": "r1"}
+```
+
+`None` / `null` means "no filter". Richer operators (ranges, set membership,
OR, NOT, etc.) are out of scope here. Callers needing backend-specific
operators should pass them through `extra_args` (Python
`VectorStoreQuery.extra_args` or `**kwargs`) or `extraArgs` (Java) — for
example, ChromaDB's native `where` dict. Implementations that receive an
unsupported operator via `filters` raise `NotImplementedError` (Python) or
`UnsupportedOperationException` (Java).
+
### Usage Example
Here's how to define and use vector stores in your agent:
@@ -432,8 +471,8 @@ Chroma is currently supported in the Python API only. To
use Chroma from Java ag
| `tenant` | str | `"default_tenant"` | ChromaDB tenant for multi-tenancy
support |
| `database` | str | `"default_database"` | ChromaDB database name |
| `collection` | str | `"flink_agents_chroma_collection"` | Name of the
ChromaDB collection to use |
-| `collection_metadata` | dict | `{}` | Metadata for the collection |
-| `create_collection_if_not_exists` | bool | `True` | Whether to create the
collection if it doesn't exist |
+| `collection_metadata` | dict | `{}` | Metadata for the collection (applied
only when the read / write paths auto-create it) |
+| `auto_create_collection` | bool | `True` | Whether read / write paths
auto-create the collection when it's missing |
#### Usage Example
@@ -471,7 +510,7 @@ class MyAgent(Agent):
embedding_model="openai_embedding",
persist_directory="/path/to/chroma/data", # For persistent storage
collection="my_documents",
- create_collection_if_not_exists=True
+ auto_create_collection=True
# Or use other modes:
# "host": "localhost", "port": 8000 # For server mode
# "api_key": "your-chroma-cloud-key" # For cloud mode
@@ -573,9 +612,7 @@ Elasticsearch is currently supported in the Java API only.
To use Elasticsearch
| `api_key_secret` | str | None | API key secret for
authentication |
{{< hint warning >}}
-For index not create by flink-agents, the index must have a `dense_tensor`
field, and user must specify the filed name by `vector_field`.
-
-And, the index can't be accessed by collection level operations due to
Elasticsearch does not support store index metadata natively.
+For an index not created by flink-agents, the index must already contain a
`dense_vector` field, and the user must specify its name via `vector_field`.
{{< /hint >}}
#### Usage Example
@@ -742,7 +779,7 @@ If you want to use vector stores not offered by the
built-in providers, you can
### BaseVectorStore
-The base class handles text-to-vector conversion and provides the high-level
add and query interface. You only need to implement the core search
functionality and other basic document level operations.
+The base class handles text-to-vector conversion and provides the high-level
`add`, `update`, and `query` interfaces. You only need to implement the public
document-level reads (`get` / `delete`) and the protected
pre-computed-embedding hooks (`_query_embedding` / `_add_embedding` /
`_update_embedding` in Python; `queryEmbedding` / `addEmbedding` /
`updateEmbedding` in Java).
{{< tabs "Custom Vector Store" >}}
@@ -757,34 +794,29 @@ class MyVectorStore(BaseVectorStore):
# Return vector store-specific configuration
# These parameters are merged with query-specific parameters
return {"index": "my_index", ...}
-
- @override
- def size(self, collection_name: str | None = None) -> int:
- """Get the size of the collection in vector store.
-
- Args:
- collection_name: The target collection. If not provided, use
defualt collection.
- """
- size = ...
- return size
@override
def get(
self,
ids: str | List[str] | None = None,
collection_name: str | None = None,
+ filters: Dict[str, Any] | None = None,
+ limit: int | None = 100,
**kwargs: Any,
) -> List[Document]:
- """Retrieve documents from the vector store by its ID.
+ """Retrieve documents from the vector store.
- Args:
- ids: Unique identifier of the documents to retrieve. If not
provided, get all documents.
- collection_name: The collection name of the documents to retrieve.
- If not provided, use defualt collection.
- **kwargs: Vector store specific parameters (offset, limit, filter
etc.)
+ When ``ids`` is provided, the ``ids`` list itself bounds the result
size
+ and ``limit`` is effectively ignored. Without ``ids``, up to ``limit``
+ documents matching ``filters`` (or all, when no filter is set) are
+ returned. ``limit=None`` means unbounded.
- Returns:
- Document object if found, None otherwise
+ Args:
+ ids: Unique identifier(s) of the documents to retrieve.
+ collection_name: Target collection. If not provided, use the
default collection.
+ filters: Metadata filter in the unified DSL (equality only);
``None`` = no filter.
+ limit: Maximum number of documents to return. Defaults to 100;
pass ``None`` for unbounded.
+ **kwargs: Vector store-specific parameters (offset, etc.).
"""
documents: List[Document] = ...
return documents
@@ -794,36 +826,42 @@ class MyVectorStore(BaseVectorStore):
self,
ids: str | List[str] | None = None,
collection_name: str | None = None,
+ filters: Dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
- """Delete documents in the vector store by its IDs.
+ """Delete documents in the vector store.
Args:
- ids: Unique identifier of the documents to delete. If not
provided, delete all documents.
- collection_name: The collection name of the documents belong to.
- If not provided, use defualt collection.
- **kwargs: Vector store specific parameters (filter etc.)
+ ids: Unique identifier(s) of the documents to delete. If neither
``ids``
+ nor ``filters`` is provided, all documents in the collection
are deleted.
+ collection_name: Target collection. If not provided, use the
default collection.
+ filters: Metadata filter in the unified DSL (equality only);
``None`` = no filter.
+ **kwargs: Vector store-specific parameters.
"""
- # delete the documents
+ # delete the documents
pass
@override
- def query_embedding(self, embedding: List[float], limit: int = 10,
**kwargs: Any) -> List[Document]:
- """Perform vector search using pre-computed embedding.
+ def _query_embedding(
+ self,
+ embedding: List[float],
+ limit: int = 10,
+ collection_name: str | None = None,
+ filters: Dict[str, Any] | None = None,
+ **kwargs: Any,
+ ) -> List[Document]:
+ """Perform vector search using a pre-computed embedding.
Args:
- embedding: Pre-computed embedding vector for semantic search
- limit: Maximum number of results to return (default: 10)
- collection_name: The collection to apply the query.
- If not provided, use default collection.
- **kwargs: Vector store-specific parameters (filters, distance
metrics, etc.)
-
- Returns:
- List of documents matching the search criteria
+ embedding: Pre-computed embedding vector for semantic search.
+ limit: Maximum number of results to return (default: 10).
+ collection_name: Target collection. If not provided, use the
default collection.
+ filters: Metadata filter in the unified DSL (equality only);
``None`` = no filter.
+ **kwargs: Vector store-specific parameters (distance metrics,
etc.).
"""
documents: List[Document] = ...
return documents
-
+
@override
def _add_embedding(
self,
@@ -835,17 +873,34 @@ class MyVectorStore(BaseVectorStore):
"""Add documents with pre-computed embeddings to the vector store.
Args:
- documents: Documents with embeddings to add to the vector store
- collection_name: The collection name of the documents to add.
- If not provided, use default collection.
- **kwargs: Vector store-specific parameters (collection, namespace,
etc.)
+ documents: Documents (with ``embedding`` populated) to add.
+ collection_name: Target collection. If not provided, use the
default collection.
+ **kwargs: Vector store-specific parameters.
Returns:
- List of document IDs that were added to the vector store
+ List of document IDs that were added.
"""
# add the documents
ids: List[str] = ...
return ids
+
+ @override
+ def _update_embedding(
+ self,
+ *,
+ documents: List[Document],
+ collection_name: str | None = None,
+ **kwargs: Any,
+ ) -> None:
+ """Update documents with pre-computed embeddings. Identity is read
from ``Document.id``.
+
+ Args:
+ documents: Documents carrying ``id`` plus the new content /
metadata / embedding.
+ collection_name: Target collection. If not provided, use the
default collection.
+ **kwargs: Vector store-specific parameters.
+ """
+ # update the documents
+ pass
```
{{< /tab >}}
@@ -869,33 +924,27 @@ public class MyVectorStore extends BaseVectorStore {
kwargs.put("index", "my_index");
return kwargs;
}
-
- /**
- * Get the size of the collection in vector store.
- *
- * @param collection The name of the collection to count. If is null,
count the default
- * collection.
- * @return The documents count in the collection.
- */
- @Override
- public long size(@Nullable String collection) throws Exception {
- size = ...;
- return size;
- }
/**
* Retrieve documents from the vector store.
*
- * @param ids The ids of the documents. If is null, get all the documents
or first n documents
- * according to implementation specific limit.
- * @param collection The name of the collection to be retrieved. If is
null, retrieve the
- * default collection.
- * @param extraArgs Additional arguments.
- * @return List of documents retrieved.
+ * <p>When {@code ids} is provided, the {@code ids} list itself bounds the
result size
+ * and {@code limit} is effectively ignored. Without {@code ids}, up to
{@code limit}
+ * documents matching {@code filters} (or all, when no filter is set) are
returned.
+ *
+ * @param ids The ids of the documents. If null, retrieve documents
matching {@code filters}.
+ * @param collection Target collection. If null, retrieve from the default
collection.
+ * @param filters Metadata filter in the unified DSL (equality only);
{@code null} = no filter.
+ * @param limit Maximum number of documents to return. Defaults to
100; pass {@code null} for unbounded.
+ * @param extraArgs Additional arguments.
*/
@Override
public List<Document> get(
- @Nullable List<String> ids, @Nullable String collection,
Map<String, Object> extraArgs)
+ @Nullable List<String> ids,
+ @Nullable String collection,
+ @Nullable Map<String, Object> filters,
+ @Nullable Integer limit,
+ Map<String, Object> extraArgs)
throws IOException {
List<Document> documents = ...;
return documents;
@@ -904,14 +953,17 @@ public class MyVectorStore extends BaseVectorStore {
/**
* Delete documents in the vector store.
*
- * @param ids The ids of the documents. If is null, delete all the
documents.
- * @param collection The name of the collection the documents belong to.
If is null, use the
- * default collection.
- * @param extraArgs Additional arguments.
+ * @param ids The ids of the documents. If null, delete documents
matching {@code filters}.
+ * @param collection Target collection. If null, use the default
collection.
+ * @param filters Metadata filter in the unified DSL (equality only);
{@code null} = no filter.
+ * @param extraArgs Additional arguments.
*/
@Override
public void delete(
- @Nullable List<String> ids, @Nullable String collection,
Map<String, Object> extraArgs)
+ @Nullable List<String> ids,
+ @Nullable String collection,
+ @Nullable Map<String, Object> filters,
+ Map<String, Object> extraArgs)
throws IOException {
// delete the documents
}
@@ -919,36 +971,53 @@ public class MyVectorStore extends BaseVectorStore {
/**
* Performs vector search using a pre-computed embedding.
*
- * @param embedding The embedding vector to search with
- * @param limit Maximum number of results to return
- * @param collection The collection to query to. If is null, query the
default collection.
- * @param args Additional arguments for the vector search
- * @return List of documents matching the query embedding
+ * @param embedding The embedding vector to search with.
+ * @param limit Maximum number of results to return.
+ * @param collection Target collection. If null, query the default
collection.
+ * @param filters Metadata filter in the unified DSL (equality only);
{@code null} = no filter.
+ * @param args Additional arguments for the vector search.
*/
@Override
- protected List<Document> queryEmbedding(
- float[] embedding, int limit, @Nullable String collection,
Map<String, Object> args) {
+ public List<Document> queryEmbedding(
+ float[] embedding,
+ int limit,
+ @Nullable String collection,
+ @Nullable Map<String, Object> filters,
+ Map<String, Object> args) {
List<Document> documents = ...;
return documents;
}
/**
- * Add documents with pre-computed embedding to vector store.
+ * Add documents with pre-computed embeddings to the vector store.
*
- * @param documents The documents to be added.
- * @param collection The name of the collection to add to. If is null, add
to the default
- * collection.
- * @param extraArgs Additional arguments.
+ * @param documents Documents (with embeddings populated) to add.
+ * @param collection Target collection. If null, add to the default
collection.
+ * @param extraArgs Additional arguments.
* @return IDs of the added documents.
*/
@Override
- protected List<String> addEmbedding(
+ public List<String> addEmbedding(
List<Document> documents, @Nullable String collection, Map<String,
Object> extraArgs)
throws IOException {
// add the documents
List<String> ids = ...;
return ids;
}
+
+ /**
+ * Update documents with pre-computed embeddings. Identity is read from
{@link Document#getId()}.
+ *
+ * @param documents Documents carrying id plus the new content / metadata
/ embedding.
+ * @param collection Target collection. If null, use the default
collection.
+ * @param extraArgs Additional arguments.
+ */
+ @Override
+ public void updateEmbedding(
+ List<Document> documents, @Nullable String collection, Map<String,
Object> extraArgs)
+ throws IOException {
+ // update the documents
+ }
}
```
@@ -958,7 +1027,7 @@ public class MyVectorStore extends BaseVectorStore {
### CollectionManageableVectorStore
-For vector store which support collection level operations, user can implement
follow methods additionally.
+For vector stores that support collection-level management, additionally
implement the following methods:
{{< tabs "Custom Vector Store support Collection" >}}
@@ -968,46 +1037,31 @@ For vector store which support collection level
operations, user can implement f
class MyVectorStore(CollectionManageableVectorStore):
# Add your custom configuration fields here
- # implementation for `BaseVectoStore` method.
-
- @override
- def get_or_create_collection(
- self, name: str, metadata: Dict[str, Any] | None = None
- ) -> Collection:
- """Get a collection, or create it if it doesn't exist.
-
- Args:
- name: Name of the collection
- metadata: Metadata of the collection
- Returns:
- The retrieved or created collection
- """
- collection: Collection = ...
- return collection
+ # implementation for `BaseVectorStore` methods (see above).
@override
- def get_collection(self, name: str) -> Collection:
- """Get a collection, raise an exception if it doesn't exist.
+ def create_collection_if_not_exists(self, name: str, **kwargs: Any) ->
None:
+ """Create the collection if it doesn't already exist; no-op otherwise.
Args:
- name: Name of the collection
- Returns:
- The retrieved collection
+ name: Name of the collection.
+ **kwargs: Backend-specific options applied only when the collection
+ is created (e.g. Chroma's ``metadata`` dict, Pinecone's
+ ``dimension`` / ``metric``). Document which keys are
recognized;
+ unknown keys should be ignored.
"""
- collection: Collection = ...
- return collection
+ # create the collection if missing
+ pass
@override
- def delete_collection(self, name: str) -> Collection:
+ def delete_collection(self, name: str) -> None:
"""Delete a collection.
Args:
- name: Name of the collection
- Returns:
- The deleted collection
+ name: Name of the collection.
"""
- collection: Collection = ...
- return collection
+ # delete the collection
+ pass
```
{{< /tab >}}
@@ -1016,46 +1070,31 @@ class MyVectorStore(CollectionManageableVectorStore):
```java
public class MyVectorStore extends BaseVectorStore
- implements CollectionManageableVectorStore{
+ implements CollectionManageableVectorStore {
// Add your custom configuration fields here
- // implementation for `BaseVectoStore` method.
-
- /**
- * Get a collection, or create it if it doesn't exist.
- *
- * @param name The name of the collection to get or create.
- * @param metadata The metadata of the collection.
- * @return The retrieved or created collection.
- */
- @override
- public Collection getOrCreateCollection(String name, Map<String, Object>
metadata) throws Exception {
- Collection collection = ...;
- return collection;
- }
+ // implementation for `BaseVectorStore` methods (see above).
/**
- * Get a collection by name.
+ * Create the collection if it doesn't already exist; no-op otherwise.
*
- * @param name The name of the collection to get.
- * @return The retrieved collection.
+ * @param name The name of the collection.
+ * @param kwargs Backend-specific options applied only when the collection
is created.
+ * Document which keys are recognized; unknown keys should
be ignored.
*/
- @override
- public Collection getCollection(String name) throws Exception {
- Collection collection = ...;
- return collection;
+ @Override
+ public void createCollectionIfNotExists(String name, Map<String, Object>
kwargs) throws Exception {
+ // create the collection if missing
}
/**
* Delete a collection by name.
*
* @param name The name of the collection to delete.
- * @return The deleted collection.
*/
- @override
- public Collection deleteCollection(String name) throws Exception {
- Collection collection = ...;
- return collection;
+ @Override
+ public void deleteCollection(String name) throws Exception {
+ // delete the collection
}
}
```