yuqi1129 commented on code in PR #9097:
URL: https://github.com/apache/gravitino/pull/9097#discussion_r2563766360
##########
catalogs/catalog-lakehouse-generic/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceTableOperations.java:
##########
@@ -240,43 +259,135 @@ private org.apache.arrow.vector.types.pojo.Schema
convertColumnsToArrowSchema(Co
return new org.apache.arrow.vector.types.pojo.Schema(fields);
}
- private void addLanceIndex(Table table, List<Index> addedIndexes) {
- String location = table.properties().get(Table.PROPERTY_LOCATION);
+ private TableChange[] modifyAddIndex(TableChange[] tableChanges, List<Index>
addIndex) {
+ int indexCount = 0;
+ for (int i = 0; i < tableChanges.length; i++) {
+ TableChange change = tableChanges[i];
+ if (change instanceof TableChange.AddIndex) {
+ Index index = addIndex.get(indexCount++);
+ tableChanges[i] =
+ new TableChange.AddIndex(
+ index.type(), index.name(), index.fieldNames(),
index.properties());
+ }
+ }
+
+ return Arrays.stream(tableChanges)
+ .map(
+ change -> {
+ if (change instanceof TableChange.AddIndex) {
+ TableChange.AddIndex addIndexChange = (TableChange.AddIndex)
change;
+ // Here we can modify the index properties if needed.
+ return new TableChange.AddIndex(
+ addIndexChange.getType(),
+ addIndexChange.getName(),
+ addIndexChange.getFieldNames(),
+ addIndexChange.getProperties());
+ } else {
+ return change;
+ }
+ })
+ .toArray(TableChange[]::new);
+ }
+
+ private List<Index> addLanceIndex(String location, List<Index> addedIndexes)
{
+ List<Index> newIndexes = Lists.newArrayList();
try (Dataset dataset = Dataset.open(location, new RootAllocator())) {
- // For Lance, we only support adding indexes, so in fact, we can't
handle drop index here.
for (Index index : addedIndexes) {
- IndexType indexType = IndexType.valueOf(index.type().name());
- IndexParams indexParams = getIndexParamsByIndexType(indexType);
-
+ IndexType indexType = getIndexType(index);
+ IndexParams indexParams = generateIndexParams(index);
dataset.createIndex(
Arrays.stream(index.fieldNames())
- .map(field -> String.join(".", field))
+ .map(fieldPath -> String.join(".", fieldPath))
.collect(Collectors.toList()),
indexType,
- Optional.of(index.name()),
+ Optional.ofNullable(index.name()),
indexParams,
- true);
+ false);
+
+ // Currently lance only supports single-field indexes, so we can use
the first field name.
+ // Another point is that we need to ensure the index name is not null
in Gravitino, so we
+ // generate a name if it's null as Lance will generate a name
automatically.
+ String lanceIndexName =
+ index.name() == null ? index.fieldNames()[0][0] + "_idx" :
index.name();
+ newIndexes.add(
+ Indexes.of(index.type(), lanceIndexName, index.fieldNames(),
index.properties()));
}
- } catch (Exception e) {
- throw new RuntimeException(
- "Failed to add indexes to Lance dataset at location " + location, e);
+
+ return newIndexes;
}
}
- private IndexParams getIndexParamsByIndexType(IndexType indexType) {
+ private IndexType getIndexType(Index index) {
+ IndexType indexType = IndexType.valueOf(index.type().name());
+ return switch (indexType) {
+ // API only supports these index types for now, but there are more
index types in Lance.
+ case SCALAR, BTREE, INVERTED, BITMAP -> indexType;
+ // According to real test, we need to map IVF_SQ/IVF_PQ/IVF_HNSW_SQ to
VECTOR type in Lance,
+ // or it will throw exception. For more, please refer to
+ // https://github.com/lancedb/lance/issues/5182#issuecomment-3524372490
+ case IVF_FLAT, IVF_PQ, IVF_HNSW_SQ -> IndexType.VECTOR;
+ default -> throw new IllegalArgumentException("Unsupported index type: "
+ indexType);
+ };
+ }
+
+ private IndexParams generateIndexParams(Index index) {
+ IndexType indexType = IndexType.valueOf(index.type().name());
+
+ String configJson = index.properties().get(LANCE_INDEX_CONFIG_KEY);
+ CreateTableIndexRequest request;
+ try {
+ request = JsonUtil.mapper().readValue(configJson,
CreateTableIndexRequest.class);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Lance index config is invalid", e);
+ }
+
+ IndexParams.Builder builder = IndexParams.builder();
switch (indexType) {
- case SCALAR:
- return IndexParams.builder().build();
- case VECTOR:
- // TODO make these parameters configurable
- int numberOfDimensions = 3; // this value should be determined
dynamically based on the data
- // Add properties to Index to set this value.
- return IndexParams.builder()
- .setVectorIndexParams(
- VectorIndexParams.ivfPq(2, 8, numberOfDimensions,
DistanceType.L2, 2))
- .build();
- default:
- throw new IllegalArgumentException("Unsupported index type: " +
indexType);
+ case SCALAR, BTREE, INVERTED, BITMAP -> builder.setScalarIndexParams(
+ ScalarIndexParams.create(indexType.name()));
+
+ case IVF_FLAT -> builder.setVectorIndexParams(
+ new VectorIndexParams.Builder(new IvfBuildParams.Builder().build())
+ .setDistanceType(toLanceDistanceType(request.getMetricType()))
+ .build());
+ case IVF_PQ -> builder.setVectorIndexParams(
+ new VectorIndexParams.Builder(new IvfBuildParams.Builder().build())
+ .setDistanceType(toLanceDistanceType(request.getMetricType()))
+ .setPqParams(
+ new PQBuildParams.Builder()
+ .setNumSubVectors(1) // others use default value.
+ .build())
+ .build());
+
+ case IVF_SQ -> builder.setVectorIndexParams(
+ new VectorIndexParams.Builder(new IvfBuildParams.Builder().build())
+ .setDistanceType(toLanceDistanceType(request.getMetricType()))
+ .setSqParams(new SQBuildParams.Builder().build())
+ .build());
+
+ case IVF_HNSW_SQ -> builder.setVectorIndexParams(
+ new VectorIndexParams.Builder(new IvfBuildParams.Builder().build())
+ .setDistanceType(toLanceDistanceType(request.getMetricType()))
+ .setHnswParams(new HnswBuildParams.Builder().build())
+ .build());
+ default -> throw new IllegalArgumentException("Unsupported index type: "
+ indexType);
Review Comment:
```java
case SCALAR, BTREE, INVERTED, BITMAP -> builder.setScalarIndexParams(
ScalarIndexParams.create(indexType.name()));
```
The code is as above, so what's the problem.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]