This is an automated email from the ASF dual-hosted git repository.
sidmishra pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 8972fae ATLAS-4492: Atlas to skip external temporary table created in
hive
8972fae is described below
commit 8972fae6ac017512c22dc383182f6258c87ef70c
Author: Radhika Kundam <[email protected]>
AuthorDate: Thu Dec 9 08:33:20 2021 -0800
ATLAS-4492: Atlas to skip external temporary table created in hive
Signed-off-by: Sidharth Mishra <[email protected]>
(cherry picked from commit 11ed7f75c0473f2b8265db425400b749882d8ce4)
---
.../java/org/apache/atlas/hive/hook/HiveHook.java | 15 ++++++++----
.../apache/atlas/hive/hook/events/CreateTable.java | 8 +++++--
.../org/apache/atlas/hive/hook/HiveHookIT.java | 28 ++++++++++++++++++++++
3 files changed, 45 insertions(+), 6 deletions(-)
diff --git
a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
index 6ea4848..4d74d0c 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -73,6 +73,7 @@ public class HiveHook extends AtlasHook implements
ExecuteWithHookContext {
public static final String HOOK_HIVE_IGNORE_DDL_OPERATIONS
= CONF_PREFIX + "hs2.ignore.ddl.operations";
public static final String
HOOK_HIVE_FILTER_ENTITY_ADDITIONAL_TYPES_TO_RETAIN = CONF_PREFIX +
"hs2.filter.entity.additional.types.to.retain";
public static final String HOOK_HIVE_SKIP_TEMP_TABLES
= CONF_PREFIX + "skip.temp.tables";
+ public static final String HOOK_HIVE_SKIP_ALL_TEMP_TABLES
= CONF_PREFIX + "skip.all.temp.tables";
public static final String DEFAULT_HOST_NAME = "localhost";
private static final Map<String, HiveOperation> OPERATION_MAP = new
HashMap<>();
@@ -95,6 +96,7 @@ public class HiveHook extends AtlasHook implements
ExecuteWithHookContext {
private static final boolean
hiveProcessPopulateDeprecatedAttributes;
private static HiveHookObjectNamesCache knownObjects = null;
private static String hostName;
+ private static boolean
skipAllTempTablesIncludingExternal;
private static boolean skipTempTables = true;
static {
@@ -154,10 +156,11 @@ public class HiveHook extends AtlasHook implements
ExecuteWithHookContext {
defaultDummyDatabase.add(SemanticAnalyzer.DUMMY_DATABASE);
defaultDummyTable.add(SemanticAnalyzer.DUMMY_TABLE);
- ignoreDummyDatabaseName =
atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name",
defaultDummyDatabase);
- ignoreDummyTableName =
atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name",
defaultDummyTable);
- ignoreValuesTmpTableNamePrefix =
atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix",
"Values__Tmp__Table__");
- skipTempTables =
atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true);
+ ignoreDummyDatabaseName =
atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name",
defaultDummyDatabase);
+ ignoreDummyTableName =
atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name",
defaultDummyTable);
+ ignoreValuesTmpTableNamePrefix =
atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix",
"Values__Tmp__Table__");
+ skipAllTempTablesIncludingExternal =
atlasProperties.getBoolean(HOOK_HIVE_SKIP_ALL_TEMP_TABLES, false);
+ skipTempTables =
skipAllTempTablesIncludingExternal ||
atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true);
try {
hostName = InetAddress.getLocalHost().getHostName();
@@ -308,6 +311,10 @@ public class HiveHook extends AtlasHook implements
ExecuteWithHookContext {
return skipTempTables;
}
+ public static boolean isSkipAllTempTablesIncludingExternal() {
+ return skipAllTempTablesIncludingExternal;
+ }
+
public PreprocessAction getPreprocessActionForHiveTable(String
qualifiedName) {
PreprocessAction ret = PreprocessAction.NONE;
diff --git
a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
index 91611de..7d39cb6 100644
---
a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
+++
b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
@@ -19,6 +19,7 @@
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.hive.hook.HiveHook;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
@@ -189,8 +190,11 @@ public class CreateTable extends BaseHiveEvent {
}
private boolean skipTemporaryTable(Table table) {
- // If its an external table, even though the temp table skip flag is
on, we create the table since we need the HDFS path to temp table lineage.
- return table != null && skipTempTables && table.isTemporary() &&
!EXTERNAL_TABLE.equals(table.getTableType());
+ /**
+ * If its an external table, even though the temp table skip
flag(skip.temp.tables) is on, we create the table since we need the HDFS path
to temp table lineage.
+ * We skip external temp table only on enabling flag for skip all temp
tables including external tables(skip.all.temp.tables)
+ **/
+ return table != null && skipTempTables && table.isTemporary() &&
(!EXTERNAL_TABLE.equals(table.getTableType()) ||
HiveHook.isSkipAllTempTablesIncludingExternal());
}
private boolean isCreateExtTableOperation(Table table) {
diff --git
a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
index 1db73e5..c257551 100755
---
a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+++
b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
@@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Joiner;
import com.sun.jersey.api.client.ClientResponse;
+import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.hive.HiveITBase;
@@ -36,6 +37,7 @@ import org.apache.atlas.model.typedef.AtlasEntityDef;
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -279,6 +281,32 @@ public class HiveHookIT extends HiveITBase {
validateHDFSPaths(processsEntity, INPUTS, pFile);
}
+ //Disabling for now. Need to revisit and check
+ @Test(enabled = false)
+ public void testCreateTemporaryExternalTable() throws Exception {
+ String tableName = tableName();
+ String colName = columnName();
+ String query = String.format("create TEMPORARY EXTERNAL
table %s.%s(%s, %s)", DEFAULT_DB , tableName , colName + " int", "name string");
+ String tableQualifiedName =
HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName,
true);
+
+ Configuration configuration = ApplicationProperties.get();
+ configuration.setProperty(HiveHook.HOOK_HIVE_SKIP_ALL_TEMP_TABLES,
true);
+ runCommand(query);
+
+ Thread.sleep(10000);
+
+ try {
+
atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(),
Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName));
+ } catch (AtlasServiceException e) {
+ if (e.getStatus() == ClientResponse.Status.NOT_FOUND) {
+ return;
+ }
+ }
+
+ fail(String.format("Entity was not supposed to exist for typeName =
%s, attributeName = %s, attributeValue = %s",
HiveDataTypes.HIVE_TABLE.getName(), ATTRIBUTE_QUALIFIED_NAME,
tableQualifiedName));
+ }
+
+
private Set<ReadEntity> getInputs(String inputName, Entity.Type
entityType) throws HiveException {
final ReadEntity entity;