iting0321 commented on code in PR #3820:
URL: https://github.com/apache/polaris/pull/3820#discussion_r2876737618
##########
plugins/spark/v3.5/spark/src/main/java/org/apache/polaris/spark/utils/PaimonHelper.java:
##########
@@ -19,75 +19,125 @@
package org.apache.polaris.spark.utils;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.iceberg.common.DynConstructors;
-import org.apache.polaris.spark.PolarisSparkCatalog;
-import org.apache.spark.sql.connector.catalog.DelegatingCatalogExtension;
+import org.apache.spark.sql.connector.catalog.CatalogPlugin;
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces;
import org.apache.spark.sql.connector.catalog.TableCatalog;
import org.apache.spark.sql.util.CaseInsensitiveStringMap;
/**
* Helper class for integrating Apache Paimon table functionality with Polaris
Spark Catalog.
*
- * <p>This class is responsible for dynamically loading and configuring a
Paimon Catalog
- * implementation to work with Polaris. It sets up the Paimon Catalog as a
delegating catalog
- * extension with Polaris Spark Catalog as the delegate, enabling Paimon table
operations through
- * Polaris.
+ * <p>This class is responsible for dynamically loading and configuring a
Paimon SparkCatalog
+ * implementation to work with Polaris. Paimon SparkCatalog manages its own
table metadata at the
+ * warehouse location following the pattern: warehouse/database.db/table_name.
+ *
+ * <p>Unlike Delta and Hudi which use DelegatingCatalogExtension pattern,
Paimon's SparkCatalog is a
+ * standalone catalog that requires proper initialization with a warehouse
path. This helper ensures
+ * Paimon is correctly configured using the Polaris warehouse location.
*
* <p>Apache Paimon is a streaming data lake platform with high-speed data
ingestion, changelog
* tracking and efficient real-time analytics. This helper enables Polaris to
manage Paimon tables
* alongside Iceberg, Delta, and Hudi tables in a unified catalog.
+ *
+ * <p>Configuration options:
+ *
+ * <ul>
+ * <li>{@code paimon-warehouse}: The warehouse path for Paimon tables. This
is required for Paimon
+ * to manage table metadata at the location:
warehouse/database.db/table_name
+ * <li>{@code paimon-catalog-impl}: Optional custom Paimon SparkCatalog
implementation class.
+ * Defaults to org.apache.paimon.spark.SparkCatalog
+ * </ul>
*/
public class PaimonHelper {
public static final String PAIMON_CATALOG_IMPL_KEY = "paimon-catalog-impl";
+ public static final String PAIMON_WAREHOUSE_KEY = "paimon-warehouse";
private static final String DEFAULT_PAIMON_CATALOG_CLASS =
"org.apache.paimon.spark.SparkCatalog";
- private TableCatalog paimonCatalog = null;
- private String paimonCatalogImpl = DEFAULT_PAIMON_CATALOG_CLASS;
+ private CatalogPlugin paimonCatalog = null;
+ private final String paimonCatalogImpl;
+ private final String paimonWarehouse;
public PaimonHelper(CaseInsensitiveStringMap options) {
if (options.get(PAIMON_CATALOG_IMPL_KEY) != null) {
this.paimonCatalogImpl = options.get(PAIMON_CATALOG_IMPL_KEY);
+ } else {
+ this.paimonCatalogImpl = DEFAULT_PAIMON_CATALOG_CLASS;
}
+ // Get the Paimon-specific warehouse path from options
+ this.paimonWarehouse = options.get(PAIMON_WAREHOUSE_KEY);
}
/**
- * Load and configure the Paimon catalog with Polaris Spark Catalog as the
delegate.
+ * Load and configure the Paimon SparkCatalog with the configured warehouse
path.
+ *
+ * <p>Paimon SparkCatalog requires a warehouse path to manage table
locations. This method
+ * initializes Paimon with the warehouse path configured via the
paimon-warehouse option.
*
- * @param polarisSparkCatalog the Polaris Spark Catalog to set as delegate
+ * @param catalogName the name of the catalog
* @return the configured Paimon TableCatalog
+ * @throws IllegalArgumentException if paimon-warehouse is not configured
*/
- public TableCatalog loadPaimonCatalog(PolarisSparkCatalog
polarisSparkCatalog) {
+ public TableCatalog loadPaimonCatalog(String catalogName) {
if (this.paimonCatalog != null) {
Review Comment:
We didn’t have this pattern. `HudiHelper` and `DeltaHelper` also have this
issue.
I can help with the follow-up.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]