difin commented on code in PR #5995: URL: https://github.com/apache/hive/pull/5995#discussion_r2283086296
########## iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/client/HiveRESTCatalogClient.java: ########## @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive.client; + +import java.io.IOException; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.CompactionMetricsDataStruct; +import org.apache.hadoop.hive.metastore.api.CreateTableRequest; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.DropDatabaseRequest; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.GetTableRequest; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint; +import org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan; +import org.apache.hadoop.hive.metastore.client.BaseMetaStoreClient; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hive.HMSTablePropertyHelper; +import org.apache.iceberg.hive.HiveOperationsBase; +import org.apache.iceberg.hive.HiveSchemaUtil; +import org.apache.iceberg.hive.RuntimeMetaException; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.rest.RESTCatalog; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HiveRESTCatalogClient extends BaseMetaStoreClient { + + public static final String NAMESPACE_SEPARATOR = "."; + public static final String NAME = "name"; + public static final String LOCATION = "location"; + public static final String ICEBERG_CATALOG_TYPE = "iceberg.catalog.default_iceberg.type"; + public static final String DB_OWNER = "owner"; + public static final String DB_OWNER_TYPE = "ownerType"; + public static final String DEFAULT_INPUT_FORMAT_CLASS = "org.apache.iceberg.mr.hive.HiveIcebergInputFormat"; + public static final String DEFAULT_OUTPUT_FORMAT_CLASS = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; + public static final String DEFAULT_SERDE_CLASS = "org.apache.iceberg.mr.hive.HiveIcebergSerDe"; + public static final String WAREHOUSE = "warehouse"; + + private static final Logger LOG = LoggerFactory.getLogger(HiveRESTCatalogClient.class); + public static final String CATALOG_CONFIG_PREFIX = "iceberg.rest-catalog."; + + private final Configuration conf; + private RESTCatalog restCatalog; + private final long maxHiveTablePropertySize; + + public HiveRESTCatalogClient(Configuration conf, boolean allowEmbedded) { + this(conf); + } + + public HiveRESTCatalogClient(Configuration conf) { + super(conf); + this.conf = conf; + this.maxHiveTablePropertySize = conf.getLong(HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE, + HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT); + reconnect(); + } + + @Override + public void reconnect() { + Map<String, String> properties = getCatalogPropertiesFromConf(conf); + String catalogName = properties.get(WAREHOUSE); + if (restCatalog != null) { + try { + restCatalog.close(); + } catch (IOException e) { + throw new RuntimeMetaException(e.getCause(), "Failed to close existing REST catalog"); + } + } + restCatalog = (RESTCatalog) CatalogUtil.buildIcebergCatalog(catalogName, properties, null); + } + + @Override + public void close() { + try { + if (restCatalog != null) { + restCatalog.close(); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static Map<String, String> getCatalogPropertiesFromConf(Configuration conf) { + Map<String, String> catalogProperties = Maps.newHashMap(); + conf.forEach(config -> { + if (config.getKey().startsWith(CATALOG_CONFIG_PREFIX)) { + catalogProperties.put( + config.getKey().substring(CATALOG_CONFIG_PREFIX.length()), + config.getValue()); + } + }); + catalogProperties.put(CatalogUtil.ICEBERG_CATALOG_TYPE, CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + return catalogProperties; + } + + + @Override + public List<String> getDatabases(String catName, String databasePattern) { + return restCatalog.listNamespaces(Namespace.empty()).stream().map(Namespace::toString).collect(Collectors.toList()); + } + + @Override + public List<String> getAllDatabases(String catName) throws TException { + return getAllDatabases(); + } + + @Override + public List<String> getTables(String catName, String dbName, String tablePattern) { + return getTables(catName, dbName, tablePattern, null); + } + + @Override + public List<String> getTables(String catName, String dbName, String tablePattern, TableType tableType) { + List<TableIdentifier> tableIdentifiers = restCatalog.listTables(Namespace.of(dbName)); + return tableIdentifiers.stream().map(TableIdentifier::name).collect(Collectors.toList()); + } + + @Override + public List<String> getAllTables(String catName, String dbName) { + return getTables(catName, dbName, "", null); + } + + @Override + public void dropTable(Table table, boolean deleteData, boolean ignoreUnknownTab, boolean ifPurge) throws TException { + restCatalog.dropTable(TableIdentifier.of(table.getDbName(), table.getTableName())); + dropTable(table.getDbName(), table.getTableName()); + } + + @Override + public boolean tableExists(String catName, String dbName, String tableName) throws TException { + return tableExists(dbName, tableName); + } + + @Override + public Database getDatabase(String catalogName, String databaseName) { + return restCatalog.listNamespaces(Namespace.empty()).stream() + .filter(namespace -> namespace.levels()[0].equals(databaseName)).map(namespace -> { + Database database = new Database(); + database.setName(String.join(NAMESPACE_SEPARATOR, namespace.levels())); + Map<String, String> namespaceMetadata = restCatalog.loadNamespaceMetadata(Namespace.of(databaseName)); + database.setLocationUri(namespaceMetadata.get(LOCATION)); + database.setCatalogName("REST"); + database.setOwnerName(namespaceMetadata.get(DB_OWNER)); + try { + database.setOwnerType(PrincipalType.valueOf(namespaceMetadata.get(DB_OWNER_TYPE))); + } catch (Exception e) { + LOG.warn("Can not set ownerType: {}", namespaceMetadata.get(DB_OWNER_TYPE), e); + } + return database; + }).findFirst().get(); + } + + private Table convertIcebergTableToHiveTable(org.apache.iceberg.Table icebergTable) { + Table hiveTable = new Table(); + TableMetadata metadata = ((BaseTable) icebergTable).operations().current(); + HMSTablePropertyHelper.updateHmsTableForIcebergTable(metadata.metadataFileLocation(), hiveTable, metadata, + null, true, maxHiveTablePropertySize, null); + hiveTable.getParameters().put(ICEBERG_CATALOG_TYPE, CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + hiveTable.setTableName(getTableName(icebergTable)); + hiveTable.setDbName(getDbName(icebergTable)); + StorageDescriptor storageDescriptor = new StorageDescriptor(); + hiveTable.setSd(storageDescriptor); + hiveTable.setTableType("EXTERNAL_TABLE"); + hiveTable.setPartitionKeys(new LinkedList<>()); + List<FieldSchema> cols = new LinkedList<>(); + storageDescriptor.setCols(cols); + storageDescriptor.setLocation(icebergTable.location()); + storageDescriptor.setInputFormat(DEFAULT_INPUT_FORMAT_CLASS); + storageDescriptor.setOutputFormat(DEFAULT_OUTPUT_FORMAT_CLASS); + storageDescriptor.setBucketCols(new LinkedList<>()); + storageDescriptor.setSortCols(new LinkedList<>()); + storageDescriptor.setParameters(Maps.newHashMap()); + SerDeInfo serDeInfo = new SerDeInfo("icebergSerde", DEFAULT_SERDE_CLASS, Maps.newHashMap()); + serDeInfo.getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); // Default serialization format. + storageDescriptor.setSerdeInfo(serDeInfo); + icebergTable.schema().columns().forEach(icebergColumn -> { + FieldSchema fieldSchema = new FieldSchema(); + fieldSchema.setName(icebergColumn.name()); + fieldSchema.setType(icebergColumn.type().toString()); + cols.add(fieldSchema); + }); + return hiveTable; + } + + private String getTableName(org.apache.iceberg.Table icebergTable) { + String[] nameParts = icebergTable.name().split("\\."); + if (nameParts.length == 3) { + return nameParts[2]; + } + if (nameParts.length == 2) { + return nameParts[1]; + } + return icebergTable.name(); + } + + private String getDbName(org.apache.iceberg.Table icebergTable) { + String[] nameParts = icebergTable.name().split("\\."); + return nameParts.length == 3 ? nameParts[1] : nameParts[0]; + } + + @Override + public Table getTable(GetTableRequest getTableRequest) throws TException { + org.apache.iceberg.Table icebergTable; + try { + icebergTable = restCatalog.loadTable(TableIdentifier.of(getTableRequest.getDbName(), + getTableRequest.getTblName())); + } catch (NoSuchTableException exception) { + throw new NoSuchObjectException(); + } + return convertIcebergTableToHiveTable(icebergTable); + } + + @Override + public void createTable(CreateTableRequest request) throws TException { + Table table = request.getTable(); + List<FieldSchema> cols = Lists.newArrayList(table.getSd().getCols()); + if (table.isSetPartitionKeys() && !table.getPartitionKeys().isEmpty()) { + cols.addAll(table.getPartitionKeys()); + } + Properties catalogProperties = HMSTablePropertyHelper.getCatalogProperties(table); + Schema schema = HiveSchemaUtil.convert(cols, true); + Map<String, String> envCtxProps = Optional.ofNullable(request.getEnvContext()) + .map(EnvironmentContext::getProperties) + .orElse(Collections.emptyMap()); + org.apache.iceberg.PartitionSpec partitionSpec = + HMSTablePropertyHelper.getPartitionSpec(envCtxProps, schema); + SortOrder sortOrder = HMSTablePropertyHelper.getSortOrder(catalogProperties, schema); + + restCatalog.buildTable(TableIdentifier.of(table.getDbName(), table.getTableName()), schema) + .withPartitionSpec(partitionSpec).withLocation(catalogProperties.getProperty(LOCATION)).withSortOrder(sortOrder) + .withProperties(catalogProperties.entrySet().stream() + .collect(Collectors.toMap(entry -> ((Map.Entry<?, ?>) entry).getKey().toString(), + entry -> ((Map.Entry<?, ?>) entry).getValue().toString()) + )).create(); + } + + @Override + public void createDatabase(Database db) { + Map<String, String> props = Maps.newHashMap(); + props.put(LOCATION, db.getLocationUri()); + props.put(DB_OWNER, db.getOwnerName()); + props.put(DB_OWNER_TYPE, db.getOwnerType().toString()); + restCatalog.createNamespace(Namespace.of(db.getName()), props); + } + + + @Override + public void dropDatabase(DropDatabaseRequest req) { + restCatalog.dropNamespace(Namespace.of(req.getName())); + } + + @Override + public void createTableWithConstraints(Table tTbl, List<SQLPrimaryKey> primaryKeys, List<SQLForeignKey> foreignKeys, + List<SQLUniqueConstraint> uniqueConstraints, List<SQLNotNullConstraint> notNullConstraints, + List<SQLDefaultConstraint> defaultConstraints, List<SQLCheckConstraint> checkConstraints) throws TException { + createTable(tTbl); + } + + @Override + public WMFullResourcePlan getResourcePlan(String resourcePlanName, String ns) { + return null; Review Comment: no, removed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org