mcolomerc commented on code in PR #250: URL: https://github.com/apache/flink-agents/pull/250#discussion_r2450720999
########## api/src/main/java/org/apache/flink/agents/api/embedding/model/BaseEmbeddingModelConnection.java: ########## @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.agents.api.embedding.model; + +import org.apache.flink.agents.api.resource.Resource; +import org.apache.flink.agents.api.resource.ResourceDescriptor; +import org.apache.flink.agents.api.resource.ResourceType; + +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; + +/** + * Abstraction of embedding model connection. + * + * <p>Responsible for managing embedding model service connection configurations, such as Service + * address, API key, Connection timeout, Model name, Authentication information, etc. + * + * <p>This class follows the parameter pattern where additional configuration options can be passed + * through a Map<String, Object> parameters argument. Common parameters include: + * + * <ul> + * <li>model - The model name to use for embeddings + * <li>encoding_format - The format for encoding (e.g., "float", "base64") + * <li>timeout - Request timeout in milliseconds + * <li>batch_size - Maximum number of texts to process in a single request + * </ul> + */ +public abstract class BaseEmbeddingModelConnection extends Resource { + + public BaseEmbeddingModelConnection( + ResourceDescriptor descriptor, BiFunction<String, ResourceType, Resource> getResource) { + super(descriptor, getResource); + } + + @Override + public ResourceType getResourceType() { + return ResourceType.EMBEDDING_MODEL_CONNECTION; + } + + /** + * Generate embeddings for a single text input. + * + * @param text The input text to generate embeddings for + * @param parameters Additional parameters to configure the embedding request. Common parameters + * include: - "dimensions" (Integer): The desired embedding dimension (model-dependent) - + * "model" (String): The specific model variant to use - "encoding_format" (String): The + * format for encoding (e.g., "float", "base64") - "timeout" (Integer): Request timeout in + * milliseconds + * @return An array of floating-point values representing the text embeddings. The length of the + * array corresponds to the requested dimension or the model's default. + */ + public abstract float[] embed(String text, Map<String, Object> parameters); + + /** + * Generate embeddings for multiple text inputs. + * + * @param texts The list of input texts to generate embeddings for + * @param parameters Additional parameters to configure the embedding request. Common parameters + * include: - "dimensions" (Integer): The desired embedding dimension (model-dependent) - + * "model" (String): The specific model variant to use - "encoding_format" (String): The + * format for encoding (e.g., "float", "base64") - "batch_size" (Integer): Maximum number of + * texts to process in a single request - "timeout" (Integer): Request timeout in + * milliseconds + * @return A list of arrays, each containing floating-point values representing the text + * embeddings. The length of each array corresponds to the requested dimension or the + * model's default. + */ + public abstract List<float[]> embed(List<String> texts, Map<String, Object> parameters); + + /** + * Get the default embedding dimension for this model when no specific dimension is requested. + * This provides a fallback dimension that the model supports by default. + * + * @return The default embedding dimension + */ + public abstract int getDefaultEmbeddingDimension(); + + /** + * Get the range of supported embedding dimensions for this model. This helps clients understand + * what dimension values can be passed in parameters. + * + * @return An array where [0] is the minimum supported dimension and [1] is the maximum, or null + * if the model only supports fixed dimensions + */ + public int[] getSupportedDimensionRange() { + // Default implementation: only supports the default dimension + int defaultDim = getDefaultEmbeddingDimension(); + return new int[] {defaultDim, defaultDim}; + } Review Comment: _Get the default embedding dimension for this model when no specific dimension is requested. * This provides a fallback dimension that the model supports by default._ It's not very common to change vector dimension in your pipeline, without changing the model, so it's quite related to the model used, so the implementation is making it as default by model Ollama embedding models: nomic-embed-text → outputs vectors of length 768 mxbai-embed-large → outputs vectors of length 1024 all-minilm → outputs vectors of length 384 OpenAI text-embedding-3-large --> 3072 If you try to use nomic-embed-text (768) but initialize your index or retrieval system with dimension 1024, you’ll get errors such as: _ValueError: dimension mismatch: expected 1024 but got 768_ Those are the reasons of managing the dimension, the `OllamaEmbeddingModelConnection` is managing and setting dimension for the given Ollama model, avoiding a wrong setting error. We can change it and set the dimension in `@EmbeddingModelSetup`, and trust the user it uses the correct dimension for the model, but for me it make sense to set it only once as default, not for every request, because is not going to change. Do you have an example of changing the dimension when building a RAG? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
