FANNG1 commented on code in PR #5722: URL: https://github.com/apache/gravitino/pull/5722#discussion_r1879602605
########## catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java: ########## @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import org.apache.gravitino.storage.OSSProperties; +import org.apache.gravitino.storage.S3Properties; + +public class PaimonPropertiesUtils { + + // Map that maintains the mapping of keys in Gravitino to that in Paimon, for example, users + // will only need to set the configuration 'catalog-backend' in Gravitino and Gravitino will + // change it to `catalogType` automatically and pass it to Paimon. + public static final Map<String, String> GRAVITINO_CONFIG_TO_PAIMON; + + static { + Map<String, String> map = new HashMap(); + map.put(PaimonConstants.CATALOG_BACKEND, PaimonConstants.CATALOG_BACKEND); + map.put(PaimonConstants.GRAVITINO_JDBC_DRIVER, PaimonConstants.GRAVITINO_JDBC_DRIVER); + map.put(PaimonConstants.GRAVITINO_JDBC_USER, PaimonConstants.PAIMON_JDBC_USER); + map.put(PaimonConstants.GRAVITINO_JDBC_PASSWORD, PaimonConstants.PAIMON_JDBC_PASSWORD); + map.put(PaimonConstants.URI, PaimonConstants.URI); + map.put(PaimonConstants.WAREHOUSE, PaimonConstants.WAREHOUSE); + map.put(PaimonConstants.CATALOG_BACKEND_NAME, PaimonConstants.CATALOG_BACKEND_NAME); + // S3 + map.put(S3Properties.GRAVITINO_S3_ENDPOINT, PaimonConstants.S3_ENDPOINT); + map.put(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, PaimonConstants.S3_ACCESS_KEY); + map.put(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, PaimonConstants.S3_SECRET_KEY); + // OSS + map.put(OSSProperties.GRAVITINO_OSS_ENDPOINT, PaimonConstants.OSS_ENDPOINT); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_ID, PaimonConstants.OSS_ACCESS_KEY); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_SECRET, PaimonConstants.OSS_SECRET_KEY); + GRAVITINO_CONFIG_TO_PAIMON = Collections.unmodifiableMap(map); + } + + /** + * Converts Gravitino properties to Paimon catalog properties, the common transform logic shared + * by Spark connector, Gravitino Paimon catalog. + * + * @param gravitinoProperties a map of Gravitino configuration properties. + * @return a map containing Paimon catalog properties. + */ + public static Map<String, String> toPaimonCatalogProperties( + Map<String, String> gravitinoProperties) { + Map<String, String> paimonProperties = new HashMap<>(); + gravitinoProperties.forEach( + (key, value) -> { + if (GRAVITINO_CONFIG_TO_PAIMON.containsKey(key)) { + paimonProperties.put(GRAVITINO_CONFIG_TO_PAIMON.get(key), value); + } + }); + return paimonProperties; + } + + /** + * Get catalog backend name from Gravitino catalog properties. + * + * @param catalogProperties a map of Gravitino catalog properties. + * @return catalog backend name. + */ + public static String getCatalogBackendName(Map<String, String> catalogProperties) { + String backendName = catalogProperties.get(PaimonConstants.CATALOG_BACKEND_NAME); Review Comment: Is `catalog-backend-name` is a new added property for Paimon catalog? could you add related document? ########## spark-connector/spark-common/build.gradle.kts: ########## @@ -123,6 +130,9 @@ dependencies { exclude("org.glassfish.jersey.inject") } testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") + testImplementation("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") Review Comment: why add these dependencies? ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java: ########## @@ -484,7 +487,7 @@ void testAlterTableRenameColumn() { } @Test - void testUpdateColumnPosition() { + protected void testUpdateColumnPosition() { Review Comment: why add `protected` to the test? ########## spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java: ########## @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import java.util.Map; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.utils.GravitinoTableInfoHelper; +import org.apache.paimon.spark.SparkTable; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.connector.read.ScanBuilder; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * For spark-connector in Paimon, it explicitly uses SparkTable to identify whether it is an Apache Review Comment: Does Paimon have a similar behavior like Iceberg? ########## catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java: ########## @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import org.apache.gravitino.storage.OSSProperties; +import org.apache.gravitino.storage.S3Properties; + +public class PaimonPropertiesUtils { + + // Map that maintains the mapping of keys in Gravitino to that in Paimon, for example, users + // will only need to set the configuration 'catalog-backend' in Gravitino and Gravitino will + // change it to `catalogType` automatically and pass it to Paimon. + public static final Map<String, String> GRAVITINO_CONFIG_TO_PAIMON; + + static { + Map<String, String> map = new HashMap(); + map.put(PaimonConstants.CATALOG_BACKEND, PaimonConstants.CATALOG_BACKEND); + map.put(PaimonConstants.GRAVITINO_JDBC_DRIVER, PaimonConstants.GRAVITINO_JDBC_DRIVER); + map.put(PaimonConstants.GRAVITINO_JDBC_USER, PaimonConstants.PAIMON_JDBC_USER); + map.put(PaimonConstants.GRAVITINO_JDBC_PASSWORD, PaimonConstants.PAIMON_JDBC_PASSWORD); + map.put(PaimonConstants.URI, PaimonConstants.URI); + map.put(PaimonConstants.WAREHOUSE, PaimonConstants.WAREHOUSE); + map.put(PaimonConstants.CATALOG_BACKEND_NAME, PaimonConstants.CATALOG_BACKEND_NAME); + // S3 + map.put(S3Properties.GRAVITINO_S3_ENDPOINT, PaimonConstants.S3_ENDPOINT); Review Comment: Do the `S3` and `OSS` properties work for the Paimon spark connector? ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java: ########## @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; +import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; +import org.apache.hadoop.fs.Path; +import org.apache.spark.sql.types.DataTypes; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public abstract class SparkPaimonCatalogIT extends SparkCommonIT { + + @Override + protected String getCatalogName() { + return "paimon"; + } + + @Override + protected String getProvider() { + return "lakehouse-paimon"; + } + + @Override + protected boolean supportsSparkSQLClusteredBy() { + return false; + } + + @Override + protected boolean supportsPartition() { + return true; + } + + @Override + protected boolean supportsDelete() { + return false; + } + + @Override + protected boolean supportsSchemaEvolution() { + return true; + } + + @Override + protected boolean supportsReplaceColumns() { + // Paimon doesn't support replace columns, because it doesn't support drop all fields in table. + // And `ALTER TABLE REPLACE COLUMNS` statement will removes all existing columns at first and + // then adds the new set of columns. + return false; + } + + @Override + protected String getTableLocation(SparkTableInfo table) { + Map<String, String> tableProperties = table.getTableProperties(); + return tableProperties.get(PaimonPropertiesConstants.PAIMON_TABLE_LOCATION); + } + + @Test + void testPaimonPartitions() { Review Comment: is there something specific to Paimon? why not move to common? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
