This is an automated email from the ASF dual-hosted git repository.
acosentino pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/main by this push:
new 7e3c1428614 CAMEL-20739 - Camel-Pinecone: Add a datatype for
transforming langchain embeddings in Pinecone objects (#14061)
7e3c1428614 is described below
commit 7e3c1428614ca6ae5193dfa017fc08db6902a849
Author: Andrea Cosentino <[email protected]>
AuthorDate: Mon May 6 14:41:27 2024 +0200
CAMEL-20739 - Camel-Pinecone: Add a datatype for transforming langchain
embeddings in Pinecone objects (#14061)
Signed-off-by: Andrea Cosentino <[email protected]>
---
.../apache/camel/catalog/transformers.properties | 1 +
.../catalog/transformers/pinecone-embeddings.json | 14 ++
.../camel-ai/camel-langchain4j-embeddings/pom.xml | 5 +
...Chain4jEmbeddingsComponentPineconeTargetIT.java | 149 +++++++++++++++++++++
.../org/apache/camel/transformer.properties | 7 +
.../apache/camel/transformer/pinecone-embeddings | 2 +
.../camel/transformer/pinecone-embeddings.json | 14 ++
.../PineconeEmbeddingsDataTypeTransformer.java | 45 +++++++
8 files changed, 237 insertions(+)
diff --git
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties
index 3b404011535..4efe6eae08f 100644
---
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties
+++
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties
@@ -27,6 +27,7 @@ google-sheets-stream-application-cloudevents
google-storage-application-cloudevents
http-application-cloudevents
milvus-embeddings
+pinecone-embeddings
protobuf-binary
protobuf-x-java-object
protobuf-x-struct
diff --git
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers/pinecone-embeddings.json
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers/pinecone-embeddings.json
new file mode 100644
index 00000000000..42b2bf34e7b
--- /dev/null
+++
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers/pinecone-embeddings.json
@@ -0,0 +1,14 @@
+{
+ "transformer": {
+ "kind": "transformer",
+ "name": "pinecone:embeddings",
+ "title": "Pinecone (Embeddings)",
+ "description": "Prepares the message to become an object writable by
Pinecone component",
+ "deprecated": false,
+ "javaType":
"org.apache.camel.component.pinecone.transform.PineconeEmbeddingsDataTypeTransformer",
+ "groupId": "org.apache.camel",
+ "artifactId": "camel-pinecone",
+ "version": "4.6.0-SNAPSHOT"
+ }
+}
+
diff --git a/components/camel-ai/camel-langchain4j-embeddings/pom.xml
b/components/camel-ai/camel-langchain4j-embeddings/pom.xml
index 06793560c05..79bb7cf9192 100644
--- a/components/camel-ai/camel-langchain4j-embeddings/pom.xml
+++ b/components/camel-ai/camel-langchain4j-embeddings/pom.xml
@@ -69,6 +69,11 @@
<artifactId>camel-milvus</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-pinecone</artifactId>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-test-junit5</artifactId>
diff --git
a/components/camel-ai/camel-langchain4j-embeddings/src/test/java/org/apache/camel/component/langchain4j/embeddings/LangChain4jEmbeddingsComponentPineconeTargetIT.java
b/components/camel-ai/camel-langchain4j-embeddings/src/test/java/org/apache/camel/component/langchain4j/embeddings/LangChain4jEmbeddingsComponentPineconeTargetIT.java
new file mode 100644
index 00000000000..9e5b3c51621
--- /dev/null
+++
b/components/camel-ai/camel-langchain4j-embeddings/src/test/java/org/apache/camel/component/langchain4j/embeddings/LangChain4jEmbeddingsComponentPineconeTargetIT.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.langchain4j.embeddings;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel;
+import io.pinecone.unsigned_indices_model.QueryResponseWithUnsignedIndices;
+import org.apache.camel.CamelContext;
+import org.apache.camel.Exchange;
+import org.apache.camel.RoutesBuilder;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.pinecone.PineconeVectorDb;
+import org.apache.camel.component.pinecone.PineconeVectorDbAction;
+import org.apache.camel.spi.DataType;
+import org.apache.camel.test.junit5.CamelTestSupport;
+import org.junit.jupiter.api.*;
+import org.junit.jupiter.api.condition.EnabledIfSystemProperties;
+import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+// Must be manually tested. Provide your own accessKey and secretKey using
-Dpinecone.token
+@EnabledIfSystemProperties({
+ @EnabledIfSystemProperty(named = "pinecone.token", matches = ".*",
disabledReason = "Pinecone token not provided"),
+})
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+public class LangChain4jEmbeddingsComponentPineconeTargetIT extends
CamelTestSupport {
+ public static final long POINT_ID = 8;
+ public static final String PINECONE_URI =
"pinecone:embeddings?token={{pinecone.token}}";
+
+ @Override
+ protected CamelContext createCamelContext() throws Exception {
+ CamelContext context = super.createCamelContext();
+
+ context.getRegistry().bind("embedding-model", new
AllMiniLmL6V2EmbeddingModel());
+
+ return context;
+ }
+
+ @Test
+ @Order(1)
+ public void createServerlessIndex() {
+
+ Exchange result = fluentTemplate.to(PINECONE_URI)
+ .withHeader(PineconeVectorDb.Headers.ACTION,
PineconeVectorDbAction.CREATE_SERVERLESS_INDEX)
+ .withBody(
+ "hello")
+ .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings")
+
.withHeader(PineconeVectorDb.Headers.COLLECTION_SIMILARITY_METRIC, "cosine")
+ .withHeader(PineconeVectorDb.Headers.COLLECTION_DIMENSION, 384)
+ .withHeader(PineconeVectorDb.Headers.COLLECTION_CLOUD, "aws")
+ .withHeader(PineconeVectorDb.Headers.COLLECTION_CLOUD_REGION,
"us-east-1")
+ .request(Exchange.class);
+
+ assertThat(result).isNotNull();
+ assertThat(result.getException()).isNull();
+ }
+
+ @Test
+ @Order(2)
+ public void upsert() {
+
+ Exchange result = fluentTemplate.to("direct:in")
+ .withHeader(PineconeVectorDb.Headers.ACTION,
PineconeVectorDbAction.UPSERT)
+ .withBody("hi")
+ .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings")
+ .withHeader(PineconeVectorDb.Headers.INDEX_ID, "elements")
+ .request(Exchange.class);
+
+ assertThat(result).isNotNull();
+ assertThat(result.getException()).isNull();
+ }
+
+ @Test
+ @Order(3)
+ public void queryByVector() {
+
+ List<Float> elements = generateFloatVector();
+
+ Exchange result = fluentTemplate.to(PINECONE_URI)
+ .withHeader(PineconeVectorDb.Headers.ACTION,
PineconeVectorDbAction.QUERY)
+ .withBody(
+ elements)
+ .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings")
+ .withHeader(PineconeVectorDb.Headers.QUERY_TOP_K, 384)
+ .request(Exchange.class);
+
+ assertThat(result).isNotNull();
+ assertThat(result.getException()).isNull();
+ assertThat(((QueryResponseWithUnsignedIndices)
result.getMessage().getBody()).getMatchesList()).isNotNull();
+ }
+
+ @Test
+ @Order(4)
+ public void deleteIndex() {
+
+ Exchange result = fluentTemplate.to(PINECONE_URI)
+ .withHeader(PineconeVectorDb.Headers.ACTION,
PineconeVectorDbAction.DELETE_INDEX)
+ .withBody(
+ "test")
+ .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings")
+ .request(Exchange.class);
+
+ assertThat(result).isNotNull();
+ assertThat(result.getException()).isNull();
+ }
+
+ @Override
+ protected RoutesBuilder createRouteBuilder() {
+ return new RouteBuilder() {
+ public void configure() {
+ from("direct:in")
+ .to("langchain4j-embeddings:test")
+
.setHeader(PineconeVectorDb.Headers.ACTION).constant(PineconeVectorDbAction.UPSERT)
+
.setHeader(PineconeVectorDb.Headers.INDEX_ID).constant(POINT_ID)
+ .transform(
+ new DataType("pinecone:embeddings"))
+ .to(PINECONE_URI);
+ }
+ };
+ }
+
+ private List<Float> generateFloatVector() {
+ Random ran = new Random();
+ List<Float> vector = new ArrayList<>();
+ for (int i = 0; i < 384; ++i) {
+ vector.add(ran.nextFloat());
+ }
+ return vector;
+ }
+}
diff --git
a/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer.properties
b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer.properties
new file mode 100644
index 00000000000..a89223e117e
--- /dev/null
+++
b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer.properties
@@ -0,0 +1,7 @@
+# Generated by camel build tools - do NOT edit this file!
+transformers=pinecone:embeddings
+groupId=org.apache.camel
+artifactId=camel-pinecone
+version=4.6.0-SNAPSHOT
+projectName=Camel :: Pinecone
+projectDescription=Camel Pinecone support
diff --git
a/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings
b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings
new file mode 100644
index 00000000000..026f84b6c31
--- /dev/null
+++
b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings
@@ -0,0 +1,2 @@
+# Generated by camel build tools - do NOT edit this file!
+class=org.apache.camel.component.pinecone.transform.PineconeEmbeddingsDataTypeTransformer
diff --git
a/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings.json
b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings.json
new file mode 100644
index 00000000000..42b2bf34e7b
--- /dev/null
+++
b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings.json
@@ -0,0 +1,14 @@
+{
+ "transformer": {
+ "kind": "transformer",
+ "name": "pinecone:embeddings",
+ "title": "Pinecone (Embeddings)",
+ "description": "Prepares the message to become an object writable by
Pinecone component",
+ "deprecated": false,
+ "javaType":
"org.apache.camel.component.pinecone.transform.PineconeEmbeddingsDataTypeTransformer",
+ "groupId": "org.apache.camel",
+ "artifactId": "camel-pinecone",
+ "version": "4.6.0-SNAPSHOT"
+ }
+}
+
diff --git
a/components/camel-pinecone/src/main/java/org/apache/camel/component/pinecone/transform/PineconeEmbeddingsDataTypeTransformer.java
b/components/camel-pinecone/src/main/java/org/apache/camel/component/pinecone/transform/PineconeEmbeddingsDataTypeTransformer.java
new file mode 100644
index 00000000000..c3d4329bbbe
--- /dev/null
+++
b/components/camel-pinecone/src/main/java/org/apache/camel/component/pinecone/transform/PineconeEmbeddingsDataTypeTransformer.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.camel.component.pinecone.transform;
+
+import java.util.UUID;
+
+import dev.langchain4j.data.embedding.Embedding;
+import org.apache.camel.Message;
+import org.apache.camel.component.pinecone.PineconeVectorDb;
+import org.apache.camel.spi.DataType;
+import org.apache.camel.spi.DataTypeTransformer;
+import org.apache.camel.spi.Transformer;
+
+/**
+ * Maps a LangChain4j Embeddings to a Pinecone InsertParam/Upsert Param to
write an embeddings vector on a Pinecone
+ * Database.
+ */
+@DataTypeTransformer(name = "pinecone:embeddings",
+ description = "Prepares the message to become an object
writable by Pinecone component")
+public class PineconeEmbeddingsDataTypeTransformer extends Transformer {
+
+ @Override
+ public void transform(Message message, DataType fromType, DataType toType)
{
+ Embedding embedding =
message.getHeader("CamelLangChain4jEmbeddingsVector", Embedding.class);
+
+ message.setHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings");
+ message.setHeader(PineconeVectorDb.Headers.INDEX_ID,
UUID.randomUUID());
+ message.setBody(embedding.vectorAsList());
+ }
+}