cpoerschke commented on code in PR #1435:
URL: https://github.com/apache/solr/pull/1435#discussion_r1136807166
##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -46,19 +50,17 @@
*/
public class DenseVectorField extends FloatPointField {
public static final String HNSW_ALGORITHM = "hnsw";
-
public static final String DEFAULT_KNN_ALGORITHM = HNSW_ALGORITHM;
static final String KNN_VECTOR_DIMENSION = "vectorDimension";
static final String KNN_SIMILARITY_FUNCTION = "similarityFunction";
-
static final String KNN_ALGORITHM = "knnAlgorithm";
static final String HNSW_MAX_CONNECTIONS = "hnswMaxConnections";
static final String HNSW_BEAM_WIDTH = "hnswBeamWidth";
-
+ static final String VECTOR_ENCODING = "vectorEncoding";
+ private final VectorEncoding DEFAULT_VECTOR_ENCODING =
VectorEncoding.FLOAT32;
+ private final VectorSimilarityFunction DEFAULT_SIMILARITY =
VectorSimilarityFunction.EUCLIDEAN;
Review Comment:
minor: keep `DEFAULT_SIMILARITY` at its current location adjacent to
`similarityFunction`
##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -106,19 +114,22 @@ public void init(IndexSchema schema, Map<String, String>
args) {
args.remove(KNN_SIMILARITY_FUNCTION);
this.knnAlgorithm = args.getOrDefault(KNN_ALGORITHM,
DEFAULT_KNN_ALGORITHM);
-
args.remove(KNN_ALGORITHM);
+ this.vectorEncoding =
+ ofNullable(args.get(VECTOR_ENCODING))
+ .map(value ->
VectorEncoding.valueOf(value.toUpperCase(Locale.ROOT)))
+ .orElse(DEFAULT_VECTOR_ENCODING);
+ ;
+
+ args.remove(VECTOR_ENCODING);
Review Comment:
minor (consistency with existing style)
```suggestion
.orElse(DEFAULT_VECTOR_ENCODING);
args.remove(VECTOR_ENCODING);
```
##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -179,24 +206,37 @@ public List<IndexableField> createFields(SchemaField
field, Object value) {
+ "', expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]",
Review Comment:
subjective: if byte encoding is used might it be confusing to see floats in
the example here?
```suggestion
+ "', expected format:'[f1, f2, f3...fn]'",
```
##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -165,10 +180,22 @@ public void checkSchemaField(final SchemaField field)
throws SolrException {
@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
- ArrayList<IndexableField> fields = new ArrayList<>();
- float[] parsedVector;
try {
- parsedVector = parseVector(value);
+ ArrayList<IndexableField> fields = new ArrayList<>();
+ VectorValue vectorValue = new VectorValue(value);
+ if (field.indexed()) {
+ fields.add(createField(field, vectorValue));
+ }
+ if (field.stored()) {
+ if (vectorEncoding.equals(VectorEncoding.FLOAT32)) {
+ for (float vectorElement : vectorValue.getFloatVector()) {
+ fields.add(getStoredField(field, vectorElement));
+ }
+ } else {
+ fields.add(new StoredField(field.getName(),
vectorValue.getByteVector()));
+ }
Review Comment:
```suggestion
} else if (vectorEncoding.equals(VectorEncoding.BYTE)) {
fields.add(new StoredField(field.getName(),
vectorValue.getByteVector()));
} else {
should not happen, throw something (mentioning the unsupported
vectorEncoding value)
}
```
##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -179,24 +206,37 @@ public List<IndexableField> createFields(SchemaField
field, Object value) {
+ "', expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]",
e);
}
+ }
- if (field.indexed()) {
- fields.add(createField(field, parsedVector));
- }
- if (field.stored()) {
- fields.ensureCapacity(parsedVector.length + 1);
- for (float vectorElement : parsedVector) {
- fields.add(getStoredField(field, vectorElement));
- }
+ @Override
+ public IndexableField createField(SchemaField field, Object vectorValue) {
+ if (vectorValue == null) return null;
+ VectorValue typedVectorValue = (VectorValue) vectorValue;
+ if (vectorEncoding.equals(VectorEncoding.BYTE)) {
+ return new KnnVectorField(
+ field.getName(), typedVectorValue.getByteVector(),
similarityFunction);
+ } else {
+ return new KnnVectorField(
+ field.getName(), typedVectorValue.getFloatVector(),
similarityFunction);
Review Comment:
```suggestion
} else if (vectorEncoding.equals(VectorEncoding.FLOAT31)) {
return new KnnVectorField(
field.getName(), typedVectorValue.getFloatVector(),
similarityFunction);
} else {
return null; // or throw something
```
##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -179,24 +206,37 @@ public List<IndexableField> createFields(SchemaField
field, Object value) {
+ "', expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]",
e);
}
+ }
- if (field.indexed()) {
- fields.add(createField(field, parsedVector));
- }
- if (field.stored()) {
- fields.ensureCapacity(parsedVector.length + 1);
- for (float vectorElement : parsedVector) {
- fields.add(getStoredField(field, vectorElement));
- }
+ @Override
+ public IndexableField createField(SchemaField field, Object vectorValue) {
+ if (vectorValue == null) return null;
+ VectorValue typedVectorValue = (VectorValue) vectorValue;
+ if (vectorEncoding.equals(VectorEncoding.BYTE)) {
+ return new KnnVectorField(
+ field.getName(), typedVectorValue.getByteVector(),
similarityFunction);
+ } else {
+ return new KnnVectorField(
+ field.getName(), typedVectorValue.getFloatVector(),
similarityFunction);
}
- return fields;
}
@Override
- public IndexableField createField(SchemaField field, Object parsedVector) {
- if (parsedVector == null) return null;
- float[] typedVector = (float[]) parsedVector;
- return new KnnVectorField(field.getName(), typedVector,
similarityFunction);
+ public Object toObject(IndexableField f) {
+ if (vectorEncoding.equals(VectorEncoding.BYTE)) {
+ BytesRef bytesRef = f.binaryValue();
+ if (bytesRef != null) {
+ List<Number> ret = new ArrayList<>();
Review Comment:
Wondering if the required capacity of the array is knowable at this point
e.g. based on dimension maybe?
```suggestion
List<Number> ret = new ArrayList<>(dimension);
```
##########
solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java:
##########
@@ -458,4 +495,128 @@ public void
query_functionQueryUsage_shouldThrowException() throws Exception {
deleteCore();
}
}
+
+ @Test
+ public void denseVectorField_shouldBePresentAfterAtomicUpdate() throws
Exception {
+ try {
+ initCore("solrconfig.xml", "schema-densevector.xml");
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField("id", "0");
+ doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4));
+ doc.addField("vector_byte_encoding", Arrays.asList(5.5, 6.6, 7.7, 8.8));
+ doc.addField("string_field", "test");
+
+ assertU(adoc(doc));
+ assertU(commit());
+
+ assertJQ(
+ req("q", "id:0", "fl", "*"),
+ "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]",
+ "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]",
+ "/response/docs/[0]/string_field==test");
+
+ SolrInputDocument updateDoc = new SolrInputDocument();
+ updateDoc.addField("id", "0");
+ updateDoc.addField("string_field", ImmutableMap.of("set", "other test"));
+ assertU(adoc(updateDoc));
+ assertU(commit());
+
+ assertJQ(
+ req("q", "id:0", "fl", "*"),
+ "/response/docs/[0]/vector==[1.1,2.2,3.3,4.4]",
+ "/response/docs/[0]/vector_byte_encoding==[5,6,7,8]",
+ "/response/docs/[0]/string_field=='other test'");
+
+ } finally {
+ deleteCore();
+ }
+ }
+
+ @Test
+ public void denseVectorFieldOnAtomicUpdate_shouldBeUpdatedCorrectly() throws
Exception {
+ try {
+ initCore("solrconfig.xml", "schema-densevector.xml");
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField("id", "0");
+ doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4));
+ doc.addField("vector_byte_encoding", Arrays.asList(5.5, 6.6, 7.7, 8.8));
Review Comment:
```suggestion
doc.addField("vector_byte_encoding", Arrays.asList(5, 6, 7, 8));
```
##########
solr/core/src/test/org/apache/solr/schema/DenseVectorFieldTest.java:
##########
@@ -458,4 +495,128 @@ public void
query_functionQueryUsage_shouldThrowException() throws Exception {
deleteCore();
}
}
+
+ @Test
+ public void denseVectorField_shouldBePresentAfterAtomicUpdate() throws
Exception {
+ try {
+ initCore("solrconfig.xml", "schema-densevector.xml");
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField("id", "0");
+ doc.addField("vector", Arrays.asList(1.1, 2.2, 3.3, 4.4));
+ doc.addField("vector_byte_encoding", Arrays.asList(5.5, 6.6, 7.7, 8.8));
Review Comment:
subjective: decimals removal already tested elsewhere?
```suggestion
doc.addField("vector_byte_encoding", Arrays.asList(5, 6, 7, 8));
```
##########
solr/core/src/test-files/solr/collection1/conf/schema-densevector.xml:
##########
@@ -20,11 +20,19 @@
<schema name="schema-densevector" version="1.0">
<fieldType name="string" class="solr.StrField" multiValued="true"/>
- <fieldType name="knn_vector" class="solr.DenseVectorField"
vectorDimension="4" similarityFunction="cosine"/>
-
+ <fieldType name="knn_vector" class="solr.DenseVectorField"
vectorDimension="4" similarityFunction="cosine" />
+ <fieldType name="plong" class="solr.LongPointField"
useDocValuesAsStored="false"/>
+
+ <fieldType name="knn_vector_byte_encoding" class="solr.DenseVectorField"
vectorDimension="4" similarityFunction="cosine" vectorEncoding="BYTE"/>
+ <fieldType name="knn_vector_esplicit_float32_encoding"
class="solr.DenseVectorField" vectorDimension="4" similarityFunction="cosine"
vectorEncoding="FLOAT32"/>
Review Comment:
```suggestion
<fieldType name="knn_vector_explicit_float32_encoding"
class="solr.DenseVectorField" vectorDimension="4" similarityFunction="cosine"
vectorEncoding="FLOAT32"/>
```
or it seems to be unused field type actually?
```suggestion
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]