morningman commented on code in PR #16602: URL: https://github.com/apache/doris/pull/16602#discussion_r1108127786
########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/DLFCatalog.java: ########## @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg.dlf; + +import org.apache.doris.datasource.iceberg.dlf.client.DLFCachedClientPool; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.exceptions.NoSuchIcebergTableException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hadoop.HadoopFileIO; +import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.io.FileIO; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class DLFCatalog extends BaseMetastoreCatalog implements SupportsNamespaces, Configurable { + + private Configuration conf; + private DLFCachedClientPool clients; + private FileIO fileIO; + private String uid; + + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + String dbName = tableIdentifier.namespace().level(0); + String tableName = tableIdentifier.name(); + return new DLFTableOperations(this.conf, this.clients, this.fileIO, this.uid, dbName, tableName); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + return null; + } + + @Override + public void initialize(String name, Map<String, String> properties) { + this.uid = name; + this.fileIO = new HadoopFileIO(conf); + this.clients = new DLFCachedClientPool(this.conf, properties); + } + + @Override + protected boolean isValidIdentifier(TableIdentifier tableIdentifier) { + return tableIdentifier.namespace().levels().length == 1; + } + + private boolean isValidNamespace(Namespace namespace) { + return namespace.levels().length == 1; + } + + @Override + public List<TableIdentifier> listTables(Namespace namespace) { + if (isValidNamespace(namespace)) { + throw new NoSuchTableException("Invalid namespace: %s", namespace); + } + String dbName = namespace.level(0); + try { + return clients.run(client -> client.getAllTables(dbName)) + .stream() + .map(tbl -> TableIdentifier.of(dbName, tbl)) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) { Review Comment: Do we need to implement these drop/rename/create method? Because it currently not supported. ########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/client/DLFCachedClientPool.java: ########## @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg.dlf.client; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.ClientPool; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.thrift.TException; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class DLFCachedClientPool implements ClientPool<IMetaStoreClient, TException> { + + private static Cache<String, DLFClientPool> clientPoolCache; + private final Configuration conf; + private final String endpoint; + private final int clientPoolSize; + private final long evictionInterval; + + public DLFCachedClientPool(Configuration conf, Map<String, String> properties) { + this.conf = conf; + this.endpoint = conf.get("", ""); + this.clientPoolSize = + PropertyUtil.propertyAsInt( + properties, + CatalogProperties.CLIENT_POOL_SIZE, Review Comment: What is the default value of `CLIENT_POOL_SIZE`? ########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/DLFCatalog.java: ########## @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg.dlf; + +import org.apache.doris.datasource.iceberg.dlf.client.DLFCachedClientPool; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.exceptions.NoSuchIcebergTableException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hadoop.HadoopFileIO; +import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.io.FileIO; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class DLFCatalog extends BaseMetastoreCatalog implements SupportsNamespaces, Configurable { + + private Configuration conf; + private DLFCachedClientPool clients; + private FileIO fileIO; + private String uid; + + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + String dbName = tableIdentifier.namespace().level(0); + String tableName = tableIdentifier.name(); + return new DLFTableOperations(this.conf, this.clients, this.fileIO, this.uid, dbName, tableName); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + return null; + } + + @Override + public void initialize(String name, Map<String, String> properties) { + this.uid = name; + this.fileIO = new HadoopFileIO(conf); + this.clients = new DLFCachedClientPool(this.conf, properties); + } + + @Override + protected boolean isValidIdentifier(TableIdentifier tableIdentifier) { + return tableIdentifier.namespace().levels().length == 1; + } + + private boolean isValidNamespace(Namespace namespace) { + return namespace.levels().length == 1; + } + + @Override + public List<TableIdentifier> listTables(Namespace namespace) { + if (isValidNamespace(namespace)) { + throw new NoSuchTableException("Invalid namespace: %s", namespace); + } + String dbName = namespace.level(0); + try { + return clients.run(client -> client.getAllTables(dbName)) + .stream() + .map(tbl -> TableIdentifier.of(dbName, tbl)) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) { + if (!isValidIdentifier(tableIdentifier)) { + throw new NoSuchTableException("Invalid identifier: %s", tableIdentifier); + } + try { + String dbName = tableIdentifier.namespace().level(0); + clients.run(client -> { + client.dropTable(dbName, tableIdentifier.name(), + false /* do not delete data */, + false /* throw NoSuchObjectException if the table doesn't exist */); + return true; + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + return false; + } + + @Override + public void renameTable(TableIdentifier sourceTbl, TableIdentifier targetTbl) { + if (!isValidIdentifier(sourceTbl)) { + throw new NoSuchTableException("Invalid identifier: %s", sourceTbl); + } + try { + String sourceDbName = sourceTbl.namespace().level(0); + String targetDbName = targetTbl.namespace().level(0); + if (!sourceDbName.equals(targetDbName)) { + throw new RuntimeException("The two table not belong to a database."); + } + Table table = clients.run(client -> client.getTable(sourceDbName, sourceTbl.name())); + validateTableIsIceberg(table, fullTableName(sourceDbName, sourceTbl)); + clients.run(client -> { + MetastoreUtil.alterTable(client, sourceDbName, sourceTbl.name(), table); + return null; + }); + } catch (Exception e) { + throw new RuntimeException("Fail to renameTable.", e); + } + } + + static void validateTableIsIceberg(Table table, String fullName) { Review Comment: Why use `static`? And better add `private`? ########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/DLFCatalog.java: ########## @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg.dlf; + +import org.apache.doris.datasource.iceberg.dlf.client.DLFCachedClientPool; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.exceptions.NoSuchIcebergTableException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hadoop.HadoopFileIO; +import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.io.FileIO; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class DLFCatalog extends BaseMetastoreCatalog implements SupportsNamespaces, Configurable { + + private Configuration conf; + private DLFCachedClientPool clients; + private FileIO fileIO; + private String uid; + + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + String dbName = tableIdentifier.namespace().level(0); + String tableName = tableIdentifier.name(); + return new DLFTableOperations(this.conf, this.clients, this.fileIO, this.uid, dbName, tableName); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + return null; + } + + @Override + public void initialize(String name, Map<String, String> properties) { + this.uid = name; + this.fileIO = new HadoopFileIO(conf); Review Comment: Why using `HadoopFileIO`, what if the data is stored on OSS? ########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/dlf/client/DLFCachedClientPool.java: ########## @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg.dlf.client; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.ClientPool; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.thrift.TException; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class DLFCachedClientPool implements ClientPool<IMetaStoreClient, TException> { Review Comment: Why we need a client pool? Is there any reference or example? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
