Repository: atlas Updated Branches: refs/heads/master 9bf0d12eb -> 92f3d4498
ATLAS-2276: update Hive hook to add an option to retain case-sensitivity of path in hdfs_path entities Signed-off-by: Madhan Neethiraj <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/92f3d449 Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/92f3d449 Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/92f3d449 Branch: refs/heads/master Commit: 92f3d4498c9f8d2825ab358f48b6cee9b7cdde8b Parents: 9bf0d12 Author: nixonrodrigues <[email protected]> Authored: Fri Nov 24 15:42:02 2017 +0530 Committer: Madhan Neethiraj <[email protected]> Committed: Tue Nov 28 12:02:43 2017 -0800 ---------------------------------------------------------------------- .../atlas/hive/bridge/HiveMetaStoreBridge.java | 25 ++++++++++++++------ .../org/apache/atlas/hive/hook/HiveHook.java | 4 ++-- 2 files changed, 20 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/atlas/blob/92f3d449/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java ---------------------------------------------------------------------- diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java index ab0094b..2d2a9bf 100755 --- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java @@ -57,6 +57,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Date; import java.util.List; +import static org.apache.atlas.hive.hook.HiveHook.CONF_PREFIX; /** * A Bridge Utility that imports metadata from the Hive Meta Store @@ -96,13 +97,19 @@ public class HiveMetaStoreBridge { private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class); - public final Hive hiveClient; - private AtlasClient atlasClient = null; + public final Hive hiveClient; + private final AtlasClient atlasClient; + private final boolean convertHdfsPathToLowerCase; HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) { - this.clusterName = clusterName; - this.hiveClient = hiveClient; - this.atlasClient = atlasClient; + this(clusterName, hiveClient, atlasClient, true); + } + + HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient, boolean convertHdfsPathToLowerCase) { + this.clusterName = clusterName; + this.hiveClient = hiveClient; + this.atlasClient = atlasClient; + this.convertHdfsPathToLowerCase = convertHdfsPathToLowerCase; } public String getClusterName() { @@ -122,13 +129,17 @@ public class HiveMetaStoreBridge { * @param hiveConf {@link HiveConf} for Hive component in the cluster */ public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf, AtlasClient atlasClient) throws Exception { - this(atlasProperties.getString(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME), Hive.get(hiveConf), atlasClient); + this(atlasProperties.getString(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME), Hive.get(hiveConf), atlasClient, atlasProperties.getBoolean(CONF_PREFIX + "hdfs_path.convert_to_lowercase", true)); } AtlasClient getAtlasClient() { return atlasClient; } + public boolean isConvertHdfsPathToLowerCase() { + return convertHdfsPathToLowerCase; + } + void importHiveMetadata(boolean failOnError) throws Exception { LOG.info("Importing hive metadata"); importDatabases(failOnError); @@ -295,7 +306,7 @@ public class HiveMetaStoreBridge { Referenceable lineageProcess = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName()); ArrayList<Referenceable> sourceList = new ArrayList<>(); ArrayList<Referenceable> targetList = new ArrayList<>(); - String tableLocation = table.getDataLocation().toString(); + String tableLocation = isConvertHdfsPathToLowerCase() ? HiveHook.lower(table.getDataLocation().toString()) : table.getDataLocation().toString(); Referenceable path = fillHDFSDataSet(tableLocation); String query = getCreateTableString(table, tableLocation); sourceList.add(path); http://git-wip-us.apache.org/repos/asf/atlas/blob/92f3d449/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java ---------------------------------------------------------------------- diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java index 57f5efb..69d294c 100755 --- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java @@ -744,7 +744,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { } else if (entity.getType() == Type.DFS_DIR) { URI location = entity.getLocation(); if (location != null) { - final String pathUri = lower(new Path(location).toString()); + final String pathUri = dgiBridge.isConvertHdfsPathToLowerCase() ? lower(new Path(location).toString()) : new Path(location).toString(); LOG.debug("Registering DFS Path {} ", pathUri); if (!dataSetsProcessed.contains(pathUri)) { Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri); @@ -793,7 +793,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) { LOG.info("Registering external table process {} ", event.getQueryStr()); - final String location = lower(hiveTable.getDataLocation().toString()); + final String location = dgiBridge.isConvertHdfsPathToLowerCase() ? lower(hiveTable.getDataLocation().toString()) : hiveTable.getDataLocation().toString(); final ReadEntity dfsEntity = new ReadEntity(); dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setD(new Path(location));
