This is an automated email from the ASF dual-hosted git repository.

sidmishra pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 8972fae  ATLAS-4492: Atlas to skip external temporary table created in 
hive
8972fae is described below

commit 8972fae6ac017512c22dc383182f6258c87ef70c
Author: Radhika Kundam <rkun...@cloudera.com>
AuthorDate: Thu Dec 9 08:33:20 2021 -0800

    ATLAS-4492: Atlas to skip external temporary table created in hive
    
    Signed-off-by: Sidharth Mishra <sidmis...@apache.org>
    (cherry picked from commit 11ed7f75c0473f2b8265db425400b749882d8ce4)
---
 .../java/org/apache/atlas/hive/hook/HiveHook.java  | 15 ++++++++----
 .../apache/atlas/hive/hook/events/CreateTable.java |  8 +++++--
 .../org/apache/atlas/hive/hook/HiveHookIT.java     | 28 ++++++++++++++++++++++
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git 
a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java 
b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
index 6ea4848..4d74d0c 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -73,6 +73,7 @@ public class HiveHook extends AtlasHook implements 
ExecuteWithHookContext {
     public static final String HOOK_HIVE_IGNORE_DDL_OPERATIONS                 
          = CONF_PREFIX + "hs2.ignore.ddl.operations";
     public static final String 
HOOK_HIVE_FILTER_ENTITY_ADDITIONAL_TYPES_TO_RETAIN        = CONF_PREFIX + 
"hs2.filter.entity.additional.types.to.retain";
     public static final String HOOK_HIVE_SKIP_TEMP_TABLES                      
          = CONF_PREFIX + "skip.temp.tables";
+    public static final String HOOK_HIVE_SKIP_ALL_TEMP_TABLES                  
          = CONF_PREFIX + "skip.all.temp.tables";
     public static final String DEFAULT_HOST_NAME = "localhost";
 
     private static final Map<String, HiveOperation> OPERATION_MAP = new 
HashMap<>();
@@ -95,6 +96,7 @@ public class HiveHook extends AtlasHook implements 
ExecuteWithHookContext {
     private static final boolean                       
hiveProcessPopulateDeprecatedAttributes;
     private static HiveHookObjectNamesCache            knownObjects = null;
     private static String hostName;
+    private static boolean                             
skipAllTempTablesIncludingExternal;
     private static boolean                             skipTempTables = true;
 
     static {
@@ -154,10 +156,11 @@ public class HiveHook extends AtlasHook implements 
ExecuteWithHookContext {
         defaultDummyDatabase.add(SemanticAnalyzer.DUMMY_DATABASE);
         defaultDummyTable.add(SemanticAnalyzer.DUMMY_TABLE);
 
-        ignoreDummyDatabaseName        = 
atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name", 
defaultDummyDatabase);
-        ignoreDummyTableName           = 
atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name", 
defaultDummyTable);
-        ignoreValuesTmpTableNamePrefix = 
atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix",
 "Values__Tmp__Table__");
-        skipTempTables                 = 
atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true);
+        ignoreDummyDatabaseName            = 
atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name", 
defaultDummyDatabase);
+        ignoreDummyTableName               = 
atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name", 
defaultDummyTable);
+        ignoreValuesTmpTableNamePrefix     = 
atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix",
 "Values__Tmp__Table__");
+        skipAllTempTablesIncludingExternal = 
atlasProperties.getBoolean(HOOK_HIVE_SKIP_ALL_TEMP_TABLES, false);
+        skipTempTables                     = 
skipAllTempTablesIncludingExternal || 
atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true);
 
         try {
             hostName = InetAddress.getLocalHost().getHostName();
@@ -308,6 +311,10 @@ public class HiveHook extends AtlasHook implements 
ExecuteWithHookContext {
         return skipTempTables;
     }
 
+    public static boolean isSkipAllTempTablesIncludingExternal() {
+        return skipAllTempTablesIncludingExternal;
+    }
+
     public PreprocessAction getPreprocessActionForHiveTable(String 
qualifiedName) {
         PreprocessAction ret = PreprocessAction.NONE;
 
diff --git 
a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
 
b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
index 91611de..7d39cb6 100644
--- 
a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
+++ 
b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
@@ -19,6 +19,7 @@
 package org.apache.atlas.hive.hook.events;
 
 import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.hive.hook.HiveHook;
 import org.apache.atlas.model.instance.AtlasEntity;
 import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
 import org.apache.atlas.model.notification.HookNotification;
@@ -189,8 +190,11 @@ public class CreateTable extends BaseHiveEvent {
     }
 
     private boolean skipTemporaryTable(Table table) {
-        // If its an external table, even though the temp table skip flag is 
on, we create the table since we need the HDFS path to temp table lineage.
-        return table != null && skipTempTables && table.isTemporary() && 
!EXTERNAL_TABLE.equals(table.getTableType());
+        /**
+         * If its an external table, even though the temp table skip 
flag(skip.temp.tables) is on, we create the table since we need the HDFS path 
to temp table lineage.
+         * We skip external temp table only on enabling flag for skip all temp 
tables including external tables(skip.all.temp.tables)
+         **/
+        return table != null && skipTempTables && table.isTemporary() && 
(!EXTERNAL_TABLE.equals(table.getTableType()) || 
HiveHook.isSkipAllTempTablesIncludingExternal());
     }
 
     private boolean isCreateExtTableOperation(Table table) {
diff --git 
a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java 
b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
index 1db73e5..c257551 100755
--- 
a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+++ 
b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
@@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.common.base.Joiner;
 import com.sun.jersey.api.client.ClientResponse;
+import org.apache.atlas.ApplicationProperties;
 import org.apache.atlas.AtlasClient;
 import org.apache.atlas.AtlasServiceException;
 import org.apache.atlas.hive.HiveITBase;
@@ -36,6 +37,7 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
 import org.apache.atlas.model.typedef.AtlasTypesDef;
 import org.apache.atlas.type.AtlasTypeUtil;
 import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.configuration.Configuration;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -279,6 +281,32 @@ public class HiveHookIT extends HiveITBase {
         validateHDFSPaths(processsEntity, INPUTS, pFile);
     }
 
+    //Disabling for now. Need to revisit and check
+    @Test(enabled = false)
+    public void testCreateTemporaryExternalTable() throws Exception {
+        String tableName          = tableName();
+        String colName            = columnName();
+        String query              = String.format("create TEMPORARY EXTERNAL 
table %s.%s(%s, %s)", DEFAULT_DB , tableName , colName + " int", "name string");
+        String tableQualifiedName = 
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName, 
true);
+
+        Configuration configuration = ApplicationProperties.get();
+        configuration.setProperty(HiveHook.HOOK_HIVE_SKIP_ALL_TEMP_TABLES, 
true);
+        runCommand(query);
+
+        Thread.sleep(10000);
+
+        try {
+            
atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(), 
Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName));
+        } catch (AtlasServiceException e) {
+            if (e.getStatus() == ClientResponse.Status.NOT_FOUND) {
+                return;
+            }
+        }
+
+        fail(String.format("Entity was not supposed to exist for typeName = 
%s, attributeName = %s, attributeValue = %s", 
HiveDataTypes.HIVE_TABLE.getName(), ATTRIBUTE_QUALIFIED_NAME, 
tableQualifiedName));
+    }
+
+
     private Set<ReadEntity> getInputs(String inputName, Entity.Type 
entityType) throws HiveException {
         final ReadEntity entity;
 

Reply via email to