This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 0866ff84e2 [iceberg] Support skipping AWS Glue archive (#4962)
0866ff84e2 is described below
commit 0866ff84e2ddc9a5deee6a7a6f594715dd57cc49
Author: Sina Siadat <[email protected]>
AuthorDate: Tue Jan 21 02:21:32 2025 +0000
[iceberg] Support skipping AWS Glue archive (#4962)
---
docs/content/migration/iceberg-compatibility.md | 6 ++++++
.../shortcodes/generated/iceberg_configuration.html | 6 ++++++
.../java/org/apache/paimon/iceberg/IcebergOptions.java | 6 ++++++
.../paimon/iceberg/IcebergHiveMetadataCommitter.java | 15 ++++++++++++++-
.../iceberg/IcebergHiveMetadataCommitterITCaseBase.java | 3 ++-
5 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/docs/content/migration/iceberg-compatibility.md
b/docs/content/migration/iceberg-compatibility.md
index 73f64fb600..66b6f17b2e 100644
--- a/docs/content/migration/iceberg-compatibility.md
+++ b/docs/content/migration/iceberg-compatibility.md
@@ -389,6 +389,12 @@ you also need to set some (or all) of the following table
options when creating
<td>String</td>
<td>Hive client class name for Iceberg Hive Catalog.</td>
</tr>
+ <tr>
+ <td><h5>metadata.iceberg.glue.skip-archive</h5></td>
+ <td style="word-wrap: break-word;">false</td>
+ <td>Boolean</td>
+ <td>Skip archive for AWS Glue catalog.</td>
+ </tr>
</tbody>
</table>
diff --git a/docs/layouts/shortcodes/generated/iceberg_configuration.html
b/docs/layouts/shortcodes/generated/iceberg_configuration.html
index b51be14a93..a71cabb679 100644
--- a/docs/layouts/shortcodes/generated/iceberg_configuration.html
+++ b/docs/layouts/shortcodes/generated/iceberg_configuration.html
@@ -44,6 +44,12 @@ under the License.
<td>String</td>
<td>Metastore database name for Iceberg Catalog. Set this as an
iceberg database alias if using a centralized Catalog.</td>
</tr>
+ <tr>
+ <td><h5>metadata.iceberg.glue.skip-archive</h5></td>
+ <td style="word-wrap: break-word;">false</td>
+ <td>Boolean</td>
+ <td>Skip archive for AWS Glue catalog.</td>
+ </tr>
<tr>
<td><h5>metadata.iceberg.hadoop-conf-dir</h5></td>
<td style="word-wrap: break-word;">(none)</td>
diff --git
a/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
b/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
index b7237dcd5b..b538a7a606 100644
--- a/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
+++ b/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
@@ -106,6 +106,12 @@ public class IcebergOptions {
"Metastore table name for Iceberg Catalog."
+ "Set this as an iceberg table alias if
using a centralized Catalog.");
+ public static final ConfigOption<Boolean> GLUE_SKIP_ARCHIVE =
+ key("metadata.iceberg.glue.skip-archive")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("Skip archive for AWS Glue catalog.");
+
/** Where to store Iceberg metadata. */
public enum StorageType implements DescribedEnum {
DISABLED("disabled", "Disable Iceberg compatibility support."),
diff --git
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitter.java
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitter.java
index 44b2a8a7e9..5eb2bfd27a 100644
---
a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitter.java
+++
b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitter.java
@@ -30,9 +30,11 @@ import org.apache.paimon.types.DataField;
import org.apache.paimon.utils.Preconditions;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
@@ -145,9 +147,20 @@ public class IcebergHiveMetadataCommitter implements
IcebergMetadataCommitter {
.put("previous_metadata_location",
baseMetadataPath.toString());
}
+ Options options = new Options(table.options());
+ boolean skipAWSGlueArchive =
options.get(IcebergOptions.GLUE_SKIP_ARCHIVE);
+ EnvironmentContext environmentContext = new EnvironmentContext();
+ environmentContext.putToProperties(StatsSetupConst.CASCADE,
StatsSetupConst.TRUE);
+ environmentContext.putToProperties(
+ "skipAWSGlueArchive", Boolean.toString(skipAWSGlueArchive));
+
clients.execute(
client ->
- client.alter_table(icebergHiveDatabase,
icebergHiveTable, hiveTable, true));
+ client.alter_table_with_environmentContext(
+ icebergHiveDatabase,
+ icebergHiveTable,
+ hiveTable,
+ environmentContext));
}
private boolean databaseExists(String databaseName) throws Exception {
diff --git
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitterITCaseBase.java
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitterITCaseBase.java
index c97f6f2090..c0c68888b8 100644
---
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitterITCaseBase.java
+++
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/iceberg/IcebergHiveMetadataCommitterITCaseBase.java
@@ -164,7 +164,8 @@ public abstract class
IcebergHiveMetadataCommitterITCaseBase {
tEnv.executeSql("CREATE DATABASE my_paimon.test_db");
tEnv.executeSql(
"CREATE TABLE my_paimon.test_db.t ( pt INT, id INT, data
STRING ) PARTITIONED BY (pt) WITH "
- + "( 'metadata.iceberg.storage' = 'hive-catalog',
'metadata.iceberg.uri' = '', 'file.format' = 'avro' )");
+ + "( 'metadata.iceberg.storage' = 'hive-catalog',
'metadata.iceberg.uri' = '', 'file.format' = 'avro',"
+ + " 'metadata.iceberg.glue.skip-archive' = 'true' )");
tEnv.executeSql(
"INSERT INTO my_paimon.test_db.t VALUES "
+ "(1, 1, 'apple'), (1, 2, 'pear'), (2, 1,
'cat'), (2, 2, 'dog')")