Copilot commented on code in PR #336: URL: https://github.com/apache/atlas/pull/336#discussion_r2276024615
########## addons/trino-extractor/src/main/java/org/apache/atlas/trino/connector/ConnectorFactory.java: ########## @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.trino.connector; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ConnectorFactory { + private static final Logger LOG = LoggerFactory.getLogger(ConnectorFactory.class); + + public static AtlasEntityConnector getConnector(String connectorType) { + switch (connectorType.toLowerCase()) { + case "iceberg": + return new IcebergEntityConnector(); + case "hive": + return new HiveEntityConnector(); + default: + LOG.warn("{} type does not have hook implemented on Atlas"); Review Comment: The log message is missing the connector type parameter. It should be LOG.warn("{} type does not have hook implemented on Atlas", connectorType); ```suggestion LOG.warn("{} type does not have hook implemented on Atlas", connectorType); ``` ########## addons/trino-extractor/src/main/java/org/apache/atlas/trino/connector/HiveEntityConnector.java: ########## @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.trino.connector; + +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.trino.client.AtlasClientHelper; +import org.apache.atlas.type.AtlasTypeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class HiveEntityConnector extends AtlasEntityConnector { + private static final Logger LOG = LoggerFactory.getLogger(HiveEntityConnector.class); + + public static final String HIVE_DB = "hive_db"; + public static final String HIVE_TABLE = "hive_table"; + public static final String HIVE_COLUMN = "hive_column"; + public static final String TRINO_SCHEMA_HIVE_DB_RELATIONSHIP = "trino_schema_hive_db"; + public static final String TRINO_TABLE_HIVE_TABLE_RELATIONSHIP = "trino_table_hive_table"; + public static final String TRINO_COLUMN_HIVE_COLUMN_RELATIONSHIP = "trino_column_hive_column"; + public static final String TRINO_SCHEMA_HIVE_DB_ATTRIBUTE = "hive_db"; + public static final String TRINO_TABLE_HIVE_TABLE_ATTRIBUTE = "hive_table"; + public static final String TRINO_COLUMN_HIVE_COLUMN_ATTRIBUTE = "hive_column"; + @Override + public void connectTrinoCatalog(String instanceName, String catalogName, AtlasEntity entity, AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) { + + } + + @Override + public void connectTrinoSchema(String instanceName, String catalogName, String schemaName, AtlasEntity dbEntity, AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) { + if (instanceName == null) { + LOG.warn("Failed attempting to connect entity since hook namespace is empty, Please configure in properties"); + return; + } + + AtlasEntity hiveDb = null; + try { + hiveDb = toDbEntity(instanceName, schemaName); + } catch (AtlasServiceException e) { + LOG.error("Error encountered: ", e); + } + + if (hiveDb != null) { + dbEntity.setRelationshipAttribute(TRINO_SCHEMA_HIVE_DB_ATTRIBUTE, AtlasTypeUtil.getAtlasRelatedObjectId(hiveDb, TRINO_SCHEMA_HIVE_DB_RELATIONSHIP)); + } + } + + @Override + public void connectTrinoTable(String instanceName, String catalogName, String schemaName, String tableName, AtlasEntity trinoTable, List<AtlasEntity> columnEntities, AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) { + if (instanceName == null) { + LOG.warn("Failed attempting to entity since hook namespace is empty, Please configure in properties"); Review Comment: The error message is missing a verb. It should be 'Failed attempting to connect entity' or 'Failed attempting to process entity'. ```suggestion LOG.warn("Failed attempting to connect entity since hook namespace is empty, Please configure in properties"); ``` ########## addons/trino-extractor/src/main/java/org/apache/atlas/trino/cli/ExtractorService.java: ########## @@ -0,0 +1,356 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.trino.cli; + +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntityHeader; +import org.apache.atlas.trino.client.AtlasClientHelper; +import org.apache.atlas.trino.client.TrinoClientHelper; +import org.apache.atlas.trino.model.Catalog; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +public class ExtractorService { + private static final Logger LOG = LoggerFactory.getLogger(ExtractorService.class); + + public static final int THREAD_POOL_SIZE = 5; + public static final int CATALOG_EXECUTION_TIMEOUT = 60; + public static final String TRINO_NAME_ATTRIBUTE = "name"; + private static final String TRINO_CATALOG_REGISTERED = "atlas.trino.catalogs.registered"; + private static final String TRINO_CATALOG_HOOK_ENABLED_PREFIX = "atlas.trino.catalog.hook.enabled."; + private static final String TRINO_CATALOG_HOOK_ENABLED_SUFFIX = ".namespace"; + private static Configuration atlasProperties; + private static TrinoClientHelper trinoClientHelper; + private static String trinoNamespace; + private ExtractorContext context; + + public boolean execute(ExtractorContext context) throws Exception { + this.context = context; + atlasProperties = context.getAtlasConf(); + trinoClientHelper = context.getTrinoConnector(); + trinoNamespace = context.getNamespace(); + + Map<String, String> catalogs = trinoClientHelper.getAllTrinoCatalogs(); + LOG.info("Found {} catalogs in Trino", catalogs.toString()); Review Comment: Calling toString() on a Map for logging can be expensive. Consider using catalogs.keySet() to log only the catalog names, or use a more efficient logging approach. ```suggestion LOG.info("Found {} catalogs in Trino: {}", catalogs.size(), catalogs.keySet()); ``` ########## addons/trino-extractor/src/main/java/org/apache/atlas/trino/client/AtlasClientHelper.java: ########## @@ -0,0 +1,430 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.trino.client; + +import com.sun.jersey.api.client.ClientResponse; +import org.apache.atlas.AtlasClientV2; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.model.discovery.AtlasSearchResult; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasEntityHeader; +import org.apache.atlas.model.instance.EntityMutationResponse; +import org.apache.atlas.model.instance.EntityMutations; +import org.apache.atlas.trino.model.Catalog; +import org.apache.atlas.type.AtlasTypeUtil; +import org.apache.atlas.utils.AuthenticationUtil; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.apache.atlas.type.AtlasTypeUtil.ATTRIBUTE_QUALIFIED_NAME; + +public class AtlasClientHelper { + private static final Logger LOG = LoggerFactory.getLogger(AtlasClientHelper.class); + + public static final String TRINO_INSTANCE = "trino_instance"; + public static final String TRINO_CATALOG = "trino_catalog"; + public static final String TRINO_SCHEMA = "trino_schema"; + public static final String TRINO_TABLE = "trino_table"; + public static final String TRINO_COLUMN = "trino_column"; + public static final String TRINO_INSTANCE_CATALOG_ATTRIBUTE = "catalogs"; + public static final String TRINO_CATALOG_SCHEMA_ATTRIBUTE = "schemas"; + public static final String TRINO_SCHEMA_TABLE_ATTRIBUTE = "tables"; + public static final String QUALIFIED_NAME_ATTRIBUTE = "qualifiedName"; + public static final String NAME_ATTRIBUTE = "name"; + public static final int pageLimit = 10000; Review Comment: Magic number 10000 should be defined with a more descriptive name like 'DEFAULT_PAGE_LIMIT' or made configurable through properties. ```suggestion public static final int DEFAULT_PAGE_LIMIT = 10000; ``` ########## addons/trino-extractor/src/main/java/org/apache/atlas/trino/connector/IcebergEntityConnector.java: ########## @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.trino.connector; + +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.trino.client.AtlasClientHelper; +import org.apache.atlas.type.AtlasTypeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class IcebergEntityConnector extends AtlasEntityConnector { + private static final Logger LOG = LoggerFactory.getLogger(IcebergEntityConnector.class); + + public static final String HIVE_DB = "hive_db"; + public static final String ICEBERG_TABLE = "iceberg_table"; + public static final String ICEBERG_COLUMN = "iceberg_column"; + public static final String TRINO_SCHEMA_HIVE_DB_RELATIONSHIP = "trino_schema_hive_db"; + public static final String TRINO_TABLE_ICEBERG_TABLE_RELATIONSHIP = "trino_table_iceberg_table"; + public static final String TRINO_COLUMN_ICEBERG_COLUMN_RELATIONSHIP = "trino_column_iceberg_column"; + public static final String TRINO_SCHEMA_HIVE_DB_ATTRIBUTE = "hive_db"; + public static final String TRINO_TABLE_ICEBERG_TABLE_ATTRIBUTE = "iceberg_table"; + public static final String TRINO_COLUMN_ICEBERG_COLUMN_ATTRIBUTE = "iceberg_column"; + + @Override + public void connectTrinoCatalog(String instanceName, String catalogName, AtlasEntity entity, AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) { + + } + + @Override + public void connectTrinoSchema(String instanceName, String catalogName, String schemaName, AtlasEntity dbEntity, AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) { + if (instanceName == null) { + LOG.warn("Failed attempting to connect entity since hook namespace is empty, Please configure in properties"); + return; + } + + AtlasEntity hiveDb = null; + try { + hiveDb = toDbEntity(instanceName, schemaName); + } catch (AtlasServiceException e) { + LOG.error("Error encountered: ", e); + } + + if (hiveDb != null) { + dbEntity.setRelationshipAttribute(TRINO_SCHEMA_HIVE_DB_ATTRIBUTE, AtlasTypeUtil.getAtlasRelatedObjectId(hiveDb, TRINO_SCHEMA_HIVE_DB_RELATIONSHIP)); + } + } + + @Override + public void connectTrinoTable(String instanceName, String catalogName, String schemaName, String tableName, AtlasEntity trinoTable, List<AtlasEntity> columnEntities, AtlasEntity.AtlasEntityWithExtInfo entityWithExtInfo) { + if (instanceName == null) { + LOG.warn("Failed attempting to entity since hook namespace is empty, Please configure in properties"); Review Comment: The error message is missing a verb. It should be 'Failed attempting to connect entity' or 'Failed attempting to process entity'. ```suggestion LOG.warn("Failed attempting to connect entity since hook namespace is empty, Please configure in properties"); ``` ########## addons/trino-extractor/src/test/java/org/apache/atlas/trino/cli/TrinoExtractorIT.java: ########## @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.trino.cli; + +public class TrinoExtractorIT { + + /* List of testcases + Invalid Arguments + Invalid cron expression + Test valid Catalog to be run + Test Instance creation + Test catalog creation + Test schema creation + Test table creation + Test of hook is enabled, hook entity if created, is connected to Trino entity + Test cron doesn't trigger new job, before earlier thread completes + Test without cron expression + Test even if catalog is not registered, it should run if passed from commandLine + Deleted table + Deleted catalog + Deleted column + Deleted schema + Rename catalog + Rename schema + Tag propogated*/ Review Comment: Misspelling: 'propogated' should be 'propagated'. ```suggestion Tag propagated*/ ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@atlas.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org