Copilot commented on code in PR #6959:
URL: https://github.com/apache/paimon/pull/6959#discussion_r2666939036


##########
paimon-faiss-jni/src/main/native/paimon_faiss_jni.cpp:
##########
@@ -0,0 +1,464 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon_faiss_jni.h"
+
+#include <faiss/Index.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexHNSW.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/IndexIDMap.h>
+#include <faiss/index_factory.h>
+#include <faiss/index_io.h>
+#include <faiss/impl/io.h>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include <cstring>
+#include <string>
+#include <sstream>
+#include <stdexcept>
+#include <vector>
+
+// Helper macro for exception handling
+#define FAISS_TRY try {
+#define FAISS_CATCH(env) \
+    } catch (const std::exception& e) { \
+        jclass exceptionClass = 
env->FindClass("org/apache/paimon/faiss/FaissException"); \
+        if (exceptionClass != nullptr) { \
+            env->ThrowNew(exceptionClass, e.what()); \
+        } \
+    } catch (...) { \
+        jclass exceptionClass = 
env->FindClass("org/apache/paimon/faiss/FaissException"); \
+        if (exceptionClass != nullptr) { \
+            env->ThrowNew(exceptionClass, "Unknown native exception"); \
+        } \
+    }
+
+// Helper function to convert jstring to std::string
+static std::string jstringToString(JNIEnv* env, jstring jstr) {
+    if (jstr == nullptr) {
+        return "";
+    }
+    const char* chars = env->GetStringUTFChars(jstr, nullptr);
+    std::string result(chars);
+    env->ReleaseStringUTFChars(jstr, chars);
+    return result;
+}
+
+// Helper function to get index pointer from handle
+static faiss::Index* getIndex(jlong handle) {
+    return reinterpret_cast<faiss::Index*>(handle);
+}
+
+// Helper to get IVF index
+static faiss::IndexIVF* getIndexIVF(jlong handle) {
+    faiss::Index* index = getIndex(handle);
+    
+    // Try direct cast
+    faiss::IndexIVF* ivf = dynamic_cast<faiss::IndexIVF*>(index);
+    if (ivf != nullptr) {
+        return ivf;
+    }
+    
+    // Try through IDMap wrapper
+    faiss::IndexIDMap* idmap = dynamic_cast<faiss::IndexIDMap*>(index);
+    if (idmap != nullptr) {
+        ivf = dynamic_cast<faiss::IndexIVF*>(idmap->index);
+        if (ivf != nullptr) {
+            return ivf;
+        }
+    }
+    
+    throw std::runtime_error("Index is not an IVF index");
+}
+
+// Helper to get HNSW index
+static faiss::IndexHNSW* getIndexHNSW(jlong handle) {
+    faiss::Index* index = getIndex(handle);
+    
+    // Try direct cast
+    faiss::IndexHNSW* hnsw = dynamic_cast<faiss::IndexHNSW*>(index);
+    if (hnsw != nullptr) {
+        return hnsw;
+    }
+    
+    // Try through IDMap wrapper
+    faiss::IndexIDMap* idmap = dynamic_cast<faiss::IndexIDMap*>(index);
+    if (idmap != nullptr) {
+        hnsw = dynamic_cast<faiss::IndexHNSW*>(idmap->index);
+        if (hnsw != nullptr) {
+            return hnsw;
+        }
+    }
+    
+    throw std::runtime_error("Index is not an HNSW index");
+}
+
+// Range search result wrapper
+struct RangeSearchResultWrapper {
+    faiss::RangeSearchResult result;
+    int nq;
+    
+    RangeSearchResultWrapper(int nq_) : result(nq_), nq(nq_) {}
+};
+
+// ==================== Index Factory ====================
+
+JNIEXPORT jlong JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexFactoryCreate
+  (JNIEnv* env, jclass, jint dimension, jstring description, jint metricType) {
+    FAISS_TRY
+        std::string desc = jstringToString(env, description);
+        faiss::MetricType metric = (metricType == 0) ? faiss::METRIC_L2 : 
faiss::METRIC_INNER_PRODUCT;
+        faiss::Index* index = faiss::index_factory(dimension, desc.c_str(), 
metric);
+        return reinterpret_cast<jlong>(index);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+// ==================== Index Operations ====================
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDestroy
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        delete getIndex(handle);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexGetDimension
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        return static_cast<jint>(getIndex(handle)->d);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetCount
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        return static_cast<jlong>(getIndex(handle)->ntotal);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT jboolean JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexIsTrained
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        return getIndex(handle)->is_trained ? JNI_TRUE : JNI_FALSE;
+    FAISS_CATCH(env)
+    return JNI_FALSE;
+}
+
+JNIEXPORT jint JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexGetMetricType
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        faiss::MetricType metric = getIndex(handle)->metric_type;
+        return (metric == faiss::METRIC_L2) ? 0 : 1;
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexTrain
+  (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray vectors) {
+    FAISS_TRY
+        jfloat* vectorData = env->GetFloatArrayElements(vectors, nullptr);
+        getIndex(handle)->train(n, vectorData);
+        env->ReleaseFloatArrayElements(vectors, vectorData, JNI_ABORT);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAdd
+  (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray vectors) {
+    FAISS_TRY
+        jfloat* vectorData = env->GetFloatArrayElements(vectors, nullptr);
+        getIndex(handle)->add(n, vectorData);
+        env->ReleaseFloatArrayElements(vectors, vectorData, JNI_ABORT);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAddWithIds
+  (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray vectors, jlongArray 
ids) {
+    FAISS_TRY
+        jfloat* vectorData = env->GetFloatArrayElements(vectors, nullptr);
+        jlong* idData = env->GetLongArrayElements(ids, nullptr);
+        
+        // Convert jlong to faiss::idx_t if needed
+        std::vector<faiss::idx_t> faissIds(n);
+        for (jlong i = 0; i < n; i++) {
+            faissIds[i] = static_cast<faiss::idx_t>(idData[i]);
+        }
+        
+        getIndex(handle)->add_with_ids(n, vectorData, faissIds.data());
+        
+        env->ReleaseFloatArrayElements(vectors, vectorData, JNI_ABORT);
+        env->ReleaseLongArrayElements(ids, idData, JNI_ABORT);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSearch
+  (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray queries, jint k,
+   jfloatArray distances, jlongArray labels) {
+    FAISS_TRY
+        jfloat* queryData = env->GetFloatArrayElements(queries, nullptr);
+        jfloat* distData = env->GetFloatArrayElements(distances, nullptr);
+        jlong* labelData = env->GetLongArrayElements(labels, nullptr);
+        
+        // Use temporary vectors for faiss
+        std::vector<faiss::idx_t> faissLabels(n * k);
+        
+        getIndex(handle)->search(n, queryData, k, distData, 
faissLabels.data());
+        
+        // Copy labels back
+        for (jlong i = 0; i < n * k; i++) {
+            labelData[i] = static_cast<jlong>(faissLabels[i]);
+        }
+        
+        env->ReleaseFloatArrayElements(queries, queryData, JNI_ABORT);
+        env->ReleaseFloatArrayElements(distances, distData, 0);
+        env->ReleaseLongArrayElements(labels, labelData, 0);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT jlong JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexRangeSearch
+  (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray queries, jfloat 
radius) {
+    FAISS_TRY
+        jfloat* queryData = env->GetFloatArrayElements(queries, nullptr);
+        
+        RangeSearchResultWrapper* wrapper = new 
RangeSearchResultWrapper(static_cast<int>(n));
+        getIndex(handle)->range_search(n, queryData, radius, &wrapper->result);
+        
+        env->ReleaseFloatArrayElements(queries, queryData, JNI_ABORT);
+        return reinterpret_cast<jlong>(wrapper);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexRemoveIds
+  (JNIEnv* env, jclass, jlong handle, jlongArray ids) {
+    FAISS_TRY
+        jsize n = env->GetArrayLength(ids);
+        jlong* idData = env->GetLongArrayElements(ids, nullptr);
+        
+        // Create ID selector
+        std::vector<faiss::idx_t> faissIds(n);
+        for (jsize i = 0; i < n; i++) {
+            faissIds[i] = static_cast<faiss::idx_t>(idData[i]);
+        }
+        faiss::IDSelectorArray selector(n, faissIds.data());
+        
+        jlong removed = 
static_cast<jlong>(getIndex(handle)->remove_ids(selector));
+        
+        env->ReleaseLongArrayElements(ids, idData, JNI_ABORT);
+        return removed;
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReset
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        getIndex(handle)->reset();
+    FAISS_CATCH(env)
+}
+
+// ==================== Index I/O ====================
+
+JNIEXPORT void JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexWriteToFile
+  (JNIEnv* env, jclass, jlong handle, jstring path) {
+    FAISS_TRY
+        std::string filePath = jstringToString(env, path);
+        faiss::write_index(getIndex(handle), filePath.c_str());
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT jlong JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexReadFromFile
+  (JNIEnv* env, jclass, jstring path) {
+    FAISS_TRY
+        std::string filePath = jstringToString(env, path);
+        faiss::Index* index = faiss::read_index(filePath.c_str());
+        return reinterpret_cast<jlong>(index);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT jbyteArray JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexSerialize
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        faiss::VectorIOWriter writer;
+        faiss::write_index(getIndex(handle), &writer);
+        
+        jbyteArray result = 
env->NewByteArray(static_cast<jsize>(writer.data.size()));
+        env->SetByteArrayRegion(result, 0, 
static_cast<jsize>(writer.data.size()),
+                                reinterpret_cast<const 
jbyte*>(writer.data.data()));
+        return result;
+    FAISS_CATCH(env)
+    return nullptr;
+}
+
+JNIEXPORT jlong JNICALL 
Java_org_apache_paimon_faiss_FaissNative_indexDeserialize
+  (JNIEnv* env, jclass, jbyteArray data) {
+    FAISS_TRY
+        jsize length = env->GetArrayLength(data);
+        jbyte* bytes = env->GetByteArrayElements(data, nullptr);
+        
+        faiss::VectorIOReader reader;
+        reader.data.resize(length);
+        memcpy(reader.data.data(), bytes, length);
+        
+        faiss::Index* index = faiss::read_index(&reader);
+        
+        env->ReleaseByteArrayElements(data, bytes, JNI_ABORT);
+        return reinterpret_cast<jlong>(index);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+// ==================== Range Search Result ====================
+
+JNIEXPORT void JNICALL 
Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultDestroy
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        delete reinterpret_cast<RangeSearchResultWrapper*>(handle);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT jlongArray JNICALL 
Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLimits
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        RangeSearchResultWrapper* wrapper = 
reinterpret_cast<RangeSearchResultWrapper*>(handle);
+        jsize n = wrapper->nq + 1;
+        jlongArray result = env->NewLongArray(n);
+        
+        std::vector<jlong> limits(n);
+        for (jsize i = 0; i < n; i++) {
+            limits[i] = static_cast<jlong>(wrapper->result.lims[i]);
+        }
+        env->SetLongArrayRegion(result, 0, n, limits.data());
+        return result;
+    FAISS_CATCH(env)
+    return nullptr;
+}
+
+JNIEXPORT jlongArray JNICALL 
Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLabels
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        RangeSearchResultWrapper* wrapper = 
reinterpret_cast<RangeSearchResultWrapper*>(handle);
+        jsize n = static_cast<jsize>(wrapper->result.lims[wrapper->nq]);
+        jlongArray result = env->NewLongArray(n);
+        
+        std::vector<jlong> labels(n);
+        for (jsize i = 0; i < n; i++) {
+            labels[i] = static_cast<jlong>(wrapper->result.labels[i]);
+        }
+        env->SetLongArrayRegion(result, 0, n, labels.data());
+        return result;
+    FAISS_CATCH(env)
+    return nullptr;
+}
+
+JNIEXPORT jfloatArray JNICALL 
Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetDistances
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        RangeSearchResultWrapper* wrapper = 
reinterpret_cast<RangeSearchResultWrapper*>(handle);
+        jsize n = static_cast<jsize>(wrapper->result.lims[wrapper->nq]);
+        jfloatArray result = env->NewFloatArray(n);
+        env->SetFloatArrayRegion(result, 0, n, wrapper->result.distances);
+        return result;
+    FAISS_CATCH(env)
+    return nullptr;
+}
+
+// ==================== IVF Index Specific ====================
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNprobe
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        return static_cast<jint>(getIndexIVF(handle)->nprobe);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfSetNprobe
+  (JNIEnv* env, jclass, jlong handle, jint nprobe) {
+    FAISS_TRY
+        getIndexIVF(handle)->nprobe = static_cast<size_t>(nprobe);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNlist
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        return static_cast<jint>(getIndexIVF(handle)->nlist);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+// ==================== HNSW Index Specific ====================
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfSearch
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        return static_cast<jint>(getIndexHNSW(handle)->hnsw.efSearch);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswSetEfSearch
+  (JNIEnv* env, jclass, jlong handle, jint efSearch) {
+    FAISS_TRY
+        getIndexHNSW(handle)->hnsw.efSearch = static_cast<int>(efSearch);
+    FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL 
Java_org_apache_paimon_faiss_FaissNative_hnswGetEfConstruction
+  (JNIEnv* env, jclass, jlong handle) {
+    FAISS_TRY
+        return static_cast<jint>(getIndexHNSW(handle)->hnsw.efConstruction);
+    FAISS_CATCH(env)
+    return 0;
+}
+
+// ==================== Utility ====================
+
+JNIEXPORT jstring JNICALL Java_org_apache_paimon_faiss_FaissNative_getVersion
+  (JNIEnv* env, jclass) {
+    // Faiss doesn't have a built-in version function, so we return our 
binding version
+    return env->NewStringUTF("1.7.4");

Review Comment:
   The FAISS version is hardcoded as a string literal. This creates a 
maintenance burden as the version must be manually updated when FAISS is 
upgraded. Consider defining this as a CMake variable or reading it from the 
actual FAISS library if possible.



##########
paimon-faiss-jni/README.md:
##########
@@ -0,0 +1,264 @@
+# Paimon Faiss JNI
+
+Java bindings for [Faiss](https://github.com/facebookresearch/faiss) - A 
library for efficient similarity search and clustering of dense vectors.
+
+## Overview
+
+Paimon Faiss JNI provides a high-performance Java API for Faiss, following a 
similar architecture to [RocksDB 
Java](https://github.com/facebook/rocksdb/wiki/RocksJava-Basics). The library 
consists of:
+
+1. **Java API Layer** (`org.apache.paimon.faiss` package) - High-level Java 
classes for creating and managing Faiss indexes
+2. **JNI Bridge** - C++ code that connects Java to the native Faiss library
+3. **Native Libraries** - Pre-compiled Faiss libraries for different 
platforms, bundled in the JAR
+
+## Features
+
+- **Multiple Index Types**: Flat, IVF, HNSW, PQ, and more
+- **Cross-Platform**: Supports Linux (x86_64, aarch64), macOS (x86_64, 
aarch64), and Windows (x86_64)

Review Comment:
   The README claims Windows support, but there are no Windows build scripts, 
GitHub Actions workflows, or native resources for Windows. Either add Windows 
support or update the documentation to remove this claim.
   ```suggestion
   - **Cross-Platform**: Supports Linux (x86_64, aarch64) and macOS (x86_64, 
aarch64)
   ```



##########
paimon-faiss-jni/NOTICE:
##########
@@ -0,0 +1,9 @@
+Apache Paimon Faiss JNI
+Copyright 2024 The Apache Software Foundation

Review Comment:
   The copyright year is 2024, but new code added in 2026 should reflect the 
current year. Update to '2024-2026' or use the standard Apache range format.
   ```suggestion
   Copyright 2024-2026 The Apache Software Foundation
   ```



##########
paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Index.java:
##########
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.io.File;
+
+/**
+ * A Faiss index for similarity search.
+ *
+ * <p>This class wraps a native Faiss index and provides methods for adding 
vectors, searching for
+ * nearest neighbors, and managing the index.
+ *
+ * <p>Index instances must be closed when no longer needed to free native 
resources. It is
+ * recommended to use try-with-resources:
+ *
+ * <pre>{@code
+ * try (Index index = IndexFactory.create(128, "Flat", MetricType.L2)) {
+ *     index.add(vectors);
+ *     SearchResult result = index.search(queries, 10);
+ * }
+ * }</pre>
+ *
+ * <p>Thread Safety: Index instances are NOT thread-safe. External 
synchronization is required if an
+ * index is accessed from multiple threads.
+ *
+ * @see IndexFactory
+ */
+public class Index implements AutoCloseable {
+
+    /** Native handle to the Faiss index. */
+    private long nativeHandle;
+
+    /** The dimension of vectors in this index. */
+    private final int dimension;
+
+    /** Whether this index has been closed. */
+    private volatile boolean closed = false;
+
+    /**
+     * Create an Index wrapper around a native handle.
+     *
+     * @param nativeHandle the native handle
+     * @param dimension the vector dimension
+     */
+    Index(long nativeHandle, int dimension) {
+        this.nativeHandle = nativeHandle;
+        this.dimension = dimension;
+    }
+
+    /**
+     * Get the dimension of vectors in this index.
+     *
+     * @return the vector dimension
+     */
+    public int getDimension() {
+        return dimension;
+    }
+
+    /**
+     * Get the number of vectors in this index.
+     *
+     * @return the number of vectors
+     */
+    public long getCount() {
+        checkNotClosed();
+        return FaissNative.indexGetCount(nativeHandle);
+    }
+
+    /**
+     * Check if this index is trained.
+     *
+     * <p>Some index types (like IVF) require training before vectors can be 
added. Flat indexes are
+     * always considered trained.
+     *
+     * @return true if the index is trained
+     */
+    public boolean isTrained() {
+        checkNotClosed();
+        return FaissNative.indexIsTrained(nativeHandle);
+    }
+
+    /**
+     * Get the metric type used by this index.
+     *
+     * @return the metric type
+     */
+    public MetricType getMetricType() {
+        checkNotClosed();
+        return 
MetricType.fromValue(FaissNative.indexGetMetricType(nativeHandle));
+    }
+
+    /**
+     * Train the index on a set of training vectors.
+     *
+     * <p>This is required for some index types (like IVF) before adding 
vectors. For flat indexes,
+     * this is a no-op.
+     *
+     * @param vectors the training vectors (n * dimension floats)
+     */
+    public void train(float[] vectors) {
+        checkNotClosed();
+        if (vectors.length % dimension != 0) {
+            throw new IllegalArgumentException(
+                    "Vector array length must be a multiple of dimension " + 
dimension);
+        }
+        long n = vectors.length / dimension;
+        FaissNative.indexTrain(nativeHandle, n, vectors);
+    }
+
+    /**
+     * Add vectors to the index.
+     *
+     * <p>The vectors are assigned sequential IDs starting from the current 
count.
+     *
+     * @param vectors the vectors to add (n * dimension floats)
+     */
+    public void add(float[] vectors) {
+        checkNotClosed();
+        if (vectors.length % dimension != 0) {
+            throw new IllegalArgumentException(
+                    "Vector array length must be a multiple of dimension " + 
dimension);
+        }
+        long n = vectors.length / dimension;
+        FaissNative.indexAdd(nativeHandle, n, vectors);
+    }
+
+    /**
+     * Add a single vector to the index.
+     *
+     * @param vector the vector to add (dimension floats)
+     */
+    public void addSingle(float[] vector) {
+        checkNotClosed();
+        if (vector.length != dimension) {
+            throw new IllegalArgumentException(
+                    "Vector length must equal dimension " + dimension + ", got 
" + vector.length);
+        }
+        FaissNative.indexAdd(nativeHandle, 1, vector);
+    }
+
+    /**
+     * Add vectors with explicit IDs to the index.
+     *
+     * <p>Note: Not all index types support this operation. Flat indexes and 
IndexIDMap wrapped
+     * indexes support it.
+     *
+     * @param vectors the vectors to add (n * dimension floats)
+     * @param ids the IDs for the vectors (n longs)
+     */
+    public void addWithIds(float[] vectors, long[] ids) {
+        checkNotClosed();
+        if (vectors.length % dimension != 0) {
+            throw new IllegalArgumentException(
+                    "Vector array length must be a multiple of dimension " + 
dimension);
+        }
+        long n = vectors.length / dimension;
+        if (ids.length != n) {
+            throw new IllegalArgumentException(
+                    "Number of IDs (" + ids.length + ") must match number of 
vectors (" + n + ")");
+        }
+        FaissNative.indexAddWithIds(nativeHandle, n, vectors, ids);
+    }
+
+    /**
+     * Search for the k nearest neighbors of query vectors.
+     *
+     * @param queries the query vectors (n * dimension floats)
+     * @param k the number of nearest neighbors to find
+     * @return the search result containing labels and distances
+     */
+    public SearchResult search(float[] queries, int k) {
+        checkNotClosed();
+        if (queries.length % dimension != 0) {
+            throw new IllegalArgumentException(
+                    "Query array length must be a multiple of dimension " + 
dimension);
+        }
+        int n = queries.length / dimension;
+        long[] labels = new long[n * k];
+        float[] distances = new float[n * k];
+        FaissNative.indexSearch(nativeHandle, n, queries, k, distances, 
labels);
+        return new SearchResult(n, k, labels, distances);
+    }
+
+    /**
+     * Search for a single query vector.
+     *
+     * @param query the query vector (dimension floats)
+     * @param k the number of nearest neighbors to find
+     * @return the search result
+     */
+    public SearchResult searchSingle(float[] query, int k) {
+        checkNotClosed();
+        if (query.length != dimension) {
+            throw new IllegalArgumentException(
+                    "Query length must equal dimension " + dimension + ", got 
" + query.length);
+        }
+        long[] labels = new long[k];
+        float[] distances = new float[k];
+        FaissNative.indexSearch(nativeHandle, 1, query, k, distances, labels);
+        return new SearchResult(1, k, labels, distances);
+    }
+
+    /**
+     * Search for all neighbors within a given radius.
+     *
+     * @param queries the query vectors (n * dimension floats)
+     * @param radius the search radius
+     * @return the range search result
+     */
+    public RangeSearchResult rangeSearch(float[] queries, float radius) {
+        checkNotClosed();
+        if (queries.length % dimension != 0) {
+            throw new IllegalArgumentException(
+                    "Query array length must be a multiple of dimension " + 
dimension);
+        }
+        int n = queries.length / dimension;
+        long resultHandle = FaissNative.indexRangeSearch(nativeHandle, n, 
queries, radius);
+        return new RangeSearchResult(resultHandle, n);
+    }
+
+    /**
+     * Remove vectors by their IDs.
+     *
+     * <p>Note: Not all index types support removal. Check Faiss documentation 
for details on which
+     * index types support this operation.
+     *
+     * @param ids the IDs of vectors to remove
+     * @return the number of vectors actually removed
+     */
+    public long removeIds(long[] ids) {
+        checkNotClosed();
+        return FaissNative.indexRemoveIds(nativeHandle, ids);
+    }
+
+    /** Reset the index (remove all vectors). */
+    public void reset() {
+        checkNotClosed();
+        FaissNative.indexReset(nativeHandle);
+    }
+
+    /**
+     * Write the index to a file.
+     *
+     * @param path the file path
+     */
+    public void writeToFile(String path) {
+        checkNotClosed();
+        FaissNative.indexWriteToFile(nativeHandle, path);
+    }
+
+    /**
+     * Write the index to a file.
+     *
+     * @param file the file
+     */
+    public void writeToFile(File file) {
+        writeToFile(file.getAbsolutePath());
+    }
+
+    /**
+     * Read an index from a file.
+     *
+     * @param path the file path
+     * @return the loaded index
+     */
+    public static Index readFromFile(String path) {
+        long handle = FaissNative.indexReadFromFile(path);
+        int dimension = FaissNative.indexGetDimension(handle);
+        return new Index(handle, dimension);
+    }
+
+    /**
+     * Read an index from a file.
+     *
+     * @param file the file
+     * @return the loaded index
+     */
+    public static Index readFromFile(File file) {
+        return readFromFile(file.getAbsolutePath());
+    }
+
+    /**
+     * Serialize the index to a byte array.
+     *
+     * @return the serialized bytes
+     */
+    public byte[] serialize() {
+        checkNotClosed();
+        return FaissNative.indexSerialize(nativeHandle);
+    }
+
+    /**
+     * Deserialize an index from a byte array.
+     *
+     * @param data the serialized bytes
+     * @return the deserialized index
+     */
+    public static Index deserialize(byte[] data) {
+        long handle = FaissNative.indexDeserialize(data);
+        int dimension = FaissNative.indexGetDimension(handle);
+        return new Index(handle, dimension);
+    }
+
+    /**
+     * Get the native handle.
+     *
+     * <p>This is for internal use only.
+     *
+     * @return the native handle
+     */
+    long getNativeHandle() {
+        return nativeHandle;
+    }
+
+    private void checkNotClosed() {
+        if (closed) {
+            throw new IllegalStateException("Index has been closed");
+        }
+    }
+
+    @Override
+    public void close() {
+        if (!closed) {
+            closed = true;
+            if (nativeHandle != 0) {
+                FaissNative.indexDestroy(nativeHandle);
+                nativeHandle = 0;
+            }
+        }
+    }
+
+    @Override
+    protected void finalize() throws Throwable {
+        try {
+            close();
+        } finally {
+            super.finalize();
+        }
+    }
+
+    @Override

Review Comment:
   The `finalize()` method is deprecated in Java 9+ and should not be used. 
Consider using a Cleaner or PhantomReference instead for resource cleanup, or 
remove this method entirely since the class already implements AutoCloseable.
   ```suggestion
   
   ```



##########
paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/RangeSearchResult.java:
##########
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.util.Arrays;
+
+/**
+ * Result of a range search operation.
+ *
+ * <p>Unlike k-NN search which returns a fixed number of neighbors per query, 
range search returns
+ * all neighbors within a given radius, which can vary per query.
+ */
+public class RangeSearchResult implements AutoCloseable {
+
+    private long nativeHandle;
+    private final int numQueries;
+    private long[] limits;
+    private long[] labels;
+    private float[] distances;
+
+    /**
+     * Create a new RangeSearchResult from a native handle.
+     *
+     * @param nativeHandle the native handle
+     * @param numQueries the number of query vectors
+     */
+    RangeSearchResult(long nativeHandle, int numQueries) {
+        this.nativeHandle = nativeHandle;
+        this.numQueries = numQueries;
+    }
+
+    /**
+     * Get the number of query vectors.
+     *
+     * @return the number of queries
+     */
+    public int getNumQueries() {
+        return numQueries;
+    }
+
+    /**
+     * Get the number of results for a specific query.
+     *
+     * @param queryIndex the query index
+     * @return the number of results
+     */
+    public long getResultCount(int queryIndex) {
+        ensureLimitsLoaded();
+        if (queryIndex < 0 || queryIndex >= numQueries) {
+            throw new IndexOutOfBoundsException("Query index out of bounds: " 
+ queryIndex);
+        }
+        return limits[queryIndex + 1] - limits[queryIndex];
+    }
+
+    /**
+     * Get the total number of results across all queries.
+     *
+     * @return the total number of results
+     */
+    public long getTotalResultCount() {
+        ensureLimitsLoaded();
+        return limits[numQueries];
+    }
+
+    /**
+     * Get the labels for a specific query.
+     *
+     * @param queryIndex the query index
+     * @return the labels for this query
+     */
+    public long[] getLabelsForQuery(int queryIndex) {
+        ensureFullyLoaded();
+        if (queryIndex < 0 || queryIndex >= numQueries) {
+            throw new IndexOutOfBoundsException("Query index out of bounds: " 
+ queryIndex);
+        }
+        int start = (int) limits[queryIndex];
+        int end = (int) limits[queryIndex + 1];
+        return Arrays.copyOfRange(labels, start, end);
+    }
+
+    /**
+     * Get the distances for a specific query.
+     *
+     * @param queryIndex the query index
+     * @return the distances for this query
+     */
+    public float[] getDistancesForQuery(int queryIndex) {
+        ensureFullyLoaded();
+        if (queryIndex < 0 || queryIndex >= numQueries) {
+            throw new IndexOutOfBoundsException("Query index out of bounds: " 
+ queryIndex);
+        }
+        int start = (int) limits[queryIndex];
+        int end = (int) limits[queryIndex + 1];
+        return Arrays.copyOfRange(distances, start, end);
+    }
+
+    /**
+     * Get all labels as a flat array.
+     *
+     * @return all labels
+     */
+    public long[] getAllLabels() {
+        ensureFullyLoaded();
+        return labels;
+    }
+
+    /**
+     * Get all distances as a flat array.
+     *
+     * @return all distances
+     */
+    public float[] getAllDistances() {
+        ensureFullyLoaded();
+        return distances;
+    }
+
+    private void ensureLimitsLoaded() {
+        if (limits == null && nativeHandle != 0) {
+            limits = FaissNative.rangeSearchResultGetLimits(nativeHandle);
+        }
+    }
+
+    private void ensureFullyLoaded() {
+        ensureLimitsLoaded();
+        if (labels == null && nativeHandle != 0) {
+            labels = FaissNative.rangeSearchResultGetLabels(nativeHandle);
+            distances = 
FaissNative.rangeSearchResultGetDistances(nativeHandle);
+        }
+    }
+
+    @Override
+    public void close() {
+        if (nativeHandle != 0) {
+            FaissNative.rangeSearchResultDestroy(nativeHandle);
+            nativeHandle = 0;
+        }
+    }
+
+    @Override
+    protected void finalize() throws Throwable {
+        try {
+            close();
+        } finally {
+            super.finalize();
+        }
+    }

Review Comment:
   The `finalize()` method is deprecated in Java 9+ and should not be used. 
Consider using a Cleaner or PhantomReference instead for resource cleanup, or 
remove this method entirely since the class already implements AutoCloseable.
   ```suggestion
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to