ATLAS-242 The qualified name for hive entities should be backward compatible (shwethags)
Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/4aef164c Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/4aef164c Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/4aef164c Branch: refs/heads/branch-0.6-incubating Commit: 4aef164cf81e37a9857007a570e03f15779aa76e Parents: fd468f4 Author: Shwetha GS <[email protected]> Authored: Mon Dec 7 14:15:10 2015 +0530 Committer: Shwetha GS <[email protected]> Committed: Mon Dec 7 14:16:15 2015 +0530 ---------------------------------------------------------------------- .../atlas/hive/bridge/HiveMetaStoreBridge.java | 12 ++++--- .../org/apache/atlas/hive/hook/HiveHookIT.java | 38 ++++++++++++-------- docs/src/site/twiki/Bridge-Hive.twiki | 12 +++---- release-log.txt | 1 + 4 files changed, 37 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/4aef164c/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java ---------------------------------------------------------------------- diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java index a4d5330..90f3d96 100755 --- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java @@ -194,7 +194,7 @@ public class HiveMetaStoreBridge { } public static String getDBQualifiedName(String clusterName, String dbName) { - return String.format("%s.%s", clusterName, dbName.toLowerCase()); + return String.format("%s@%s", dbName.toLowerCase(), clusterName); } /** @@ -234,7 +234,7 @@ public class HiveMetaStoreBridge { } public static String getTableQualifiedName(String clusterName, String dbName, String tableName) { - return String.format("%s.%s.%s", clusterName, dbName.toLowerCase(), tableName.toLowerCase()); + return String.format("%s.%s@%s", dbName.toLowerCase(), tableName.toLowerCase(), clusterName); } public Referenceable createTableInstance(Referenceable dbReference, Table hiveTable) @@ -392,8 +392,8 @@ public class HiveMetaStoreBridge { } private String getPartitionQualifiedName(Partition partition) { - return String.format("%s.%s.%s.%s", clusterName, partition.getTable().getDbName(), - partition.getTable().getTableName(), StringUtils.join(partition.getValues(), "/")); + return String.format("%s.%s.%s@%s", partition.getTable().getDbName(), + partition.getTable().getTableName(), StringUtils.join(partition.getValues(), "-"), clusterName); } private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc, String tableQualifiedName, @@ -454,7 +454,9 @@ public class HiveMetaStoreBridge { } private String getColumnQualifiedName(String tableQualifiedName, String colName) { - return String.format("%s.%s", tableQualifiedName, colName); + String[] parts = tableQualifiedName.split("@"); + String tableName = parts[0]; + return String.format("%s.%s@%s", tableName, colName, clusterName); } private List<Referenceable> getColumns(List<FieldSchema> schemaList, String tableQualifiedName) throws Exception { http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/4aef164c/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java ---------------------------------------------------------------------- diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java index 16efe46..643a29a 100755 --- a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java @@ -86,7 +86,12 @@ public class HiveHookIT { //There should be just one entity per dbname runCommand("drop database " + dbName); runCommand("create database " + dbName); - assertDatabaseIsRegistered(dbName); + String dbid = assertDatabaseIsRegistered(dbName); + + //assert on qualified name + Referenceable dbEntity = dgiCLient.getEntity(dbid); + Assert.assertEquals(dbEntity.get("qualifiedName"), dbName.toLowerCase() + "@" + CLUSTER_NAME); + } private String dbName() { @@ -121,8 +126,12 @@ public class HiveHookIT { String colName = "col" + random(); runCommand("create table " + dbName + "." + tableName + "(" + colName + " int, name string)"); assertTableIsRegistered(dbName, tableName); + //there is only one instance of column registered - assertColumnIsRegistered(colName); + String colId = assertColumnIsRegistered(colName); + Referenceable colEntity = dgiCLient.getEntity(colId); + Assert.assertEquals(colEntity.get("qualifiedName"), String.format("%s.%s.%s@%s", dbName.toLowerCase(), + tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME)); tableName = createTable(); String tableId = assertTableIsRegistered(DEFAULT_DB, tableName); @@ -131,6 +140,7 @@ public class HiveHookIT { Assert.assertEquals(tableRef.get(HiveDataModelGenerator.COMMENT), "table comment"); String entityName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); Assert.assertEquals(tableRef.get(HiveDataModelGenerator.NAME), entityName); + Assert.assertEquals(tableRef.get("name"), "default." + tableName.toLowerCase() + "@" + CLUSTER_NAME); final Referenceable sdRef = (Referenceable) tableRef.get("sd"); Assert.assertEquals(sdRef.get(HiveDataModelGenerator.STORAGE_IS_STORED_AS_SUB_DIRS), false); @@ -189,7 +199,10 @@ public class HiveHookIT { runCommand(query); assertProcessIsRegistered(query); - assertPartitionIsRegistered(DEFAULT_DB, insertTableName, "2015-01-01"); + String partId = assertPartitionIsRegistered(DEFAULT_DB, insertTableName, "2015-01-01"); + Referenceable partitionEntity = dgiCLient.getEntity(partId); + Assert.assertEquals(partitionEntity.get("qualifiedName"), + String.format("%s.%s.%s@%s", "default", insertTableName.toLowerCase(), "2015-01-01", CLUSTER_NAME)); } private String random() { @@ -231,7 +244,9 @@ public class HiveHookIT { String tableName = createTable(); String query = "select * from " + tableName; runCommand(query); - assertProcessIsRegistered(query); + String pid = assertProcessIsRegistered(query); + Referenceable processEntity = dgiCLient.getEntity(pid); + Assert.assertEquals(processEntity.get("name"), query.toLowerCase()); //single entity per query query = "SELECT * from " + tableName.toUpperCase(); @@ -265,7 +280,7 @@ public class HiveHookIT { assertTableIsNotRegistered(DEFAULT_DB, viewName); } - private void assertProcessIsRegistered(String queryStr) throws Exception { + private String assertProcessIsRegistered(String queryStr) throws Exception { // String dslQuery = String.format("%s where queryText = \"%s\"", HiveDataTypes.HIVE_PROCESS.getName(), // normalize(queryStr)); // assertEntityIsRegistered(dslQuery, true); @@ -274,7 +289,7 @@ public class HiveHookIT { String gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.queryText', \"%s\").toList()", typeName, typeName, normalize(queryStr)); - assertEntityIsRegistered(gremlinQuery); + return assertEntityIsRegistered(gremlinQuery); } private String normalize(String str) { @@ -307,22 +322,15 @@ public class HiveHookIT { return assertEntityIsRegistered(query); } - private void assertPartitionIsRegistered(String dbName, String tableName, String value) throws Exception { + private String assertPartitionIsRegistered(String dbName, String tableName, String value) throws Exception { String typeName = HiveDataTypes.HIVE_PARTITION.getName(); - String dbType = HiveDataTypes.HIVE_DB.getName(); - String tableType = HiveDataTypes.HIVE_TABLE.getName(); LOG.debug("Searching for partition of {}.{} with values {}", dbName, tableName, value); - /* gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.values', ['%s']).as('p')." - + "out('__%s.table').has('%s.tableName', '%s').out('__%s.db').has('%s.name', '%s')" - + ".has('%s.clusterName', '%s').back('p').toList()", typeName, typeName, value, typeName, - tableType, tableName.toLowerCase(), tableType, dbType, dbName.toLowerCase(), dbType, CLUSTER_NAME); - */ String dslQuery = String.format("%s as p where values = ['%s'], table where tableName = '%s', " + "db where name = '%s' and clusterName = '%s' select p", typeName, value, tableName.toLowerCase(), dbName.toLowerCase(), CLUSTER_NAME); - assertEntityIsRegistered(dslQuery, "p"); + return assertEntityIsRegistered(dslQuery, "p"); } private String assertEntityIsRegistered(final String query, String... arg) throws Exception { http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/4aef164c/docs/src/site/twiki/Bridge-Hive.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/Bridge-Hive.twiki b/docs/src/site/twiki/Bridge-Hive.twiki index 51656b1..34f82a5 100644 --- a/docs/src/site/twiki/Bridge-Hive.twiki +++ b/docs/src/site/twiki/Bridge-Hive.twiki @@ -19,12 +19,12 @@ hive_partition(ClassType) - super types [Referenceable] - attributes [values, ta hive_process(ClassType) - super types [Process] - attributes [startTime, endTime, userName, operationType, queryText, queryPlan, queryId, queryGraph] </verbatim> -The entities are created and de-duped using unique qualified name. They provide namespace and can be used for querying as well: -hive_db - attribute qualifiedName - clustername.dbname -hive_table - attribute name - clustername.dbname.tablename -hive_column - attribute qualifiedName - clustername.dbname.tablename.columnname -hive_partition - attribute qualifiedName - clustername.dbname.tablename.partitionvalues -hive_process - attribute qualifiedName - queryText +The entities are created and de-duped using unique qualified name. They provide namespace and can be used for querying/lineage as well. Note that dbName and tableName should be in lower case. clusterName is explained below: +hive_db - attribute qualifiedName - <dbName>@<clusterName> +hive_table - attribute name - <dbName>.<tableName>@<clusterName> +hive_column - attribute qualifiedName - <dbName>.<tableName>.<columnName>@<clusterName> +hive_partition - attribute qualifiedName - <dbName>.<tableName>.<partitionValues('-' separated)>@<clusterName> +hive_process - attribute name - <queryString> - trimmed query string in lower case ---++ Importing Hive Metadata http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/4aef164c/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 0329c7f..b773bfb 100644 --- a/release-log.txt +++ b/release-log.txt @@ -9,6 +9,7 @@ ATLAS-54 Rename configs in hive hook (shwethags) ATLAS-3 Mixed Index creation fails with Date types (sumasai via shwethags) ALL CHANGES: +ATLAS-242 The qualified name for hive entities should be backward compatible (shwethags) ATLAS-361 Add validation when index backends are switched in ATLAS configuration (sumasai via shwethags) ATLAS-171 Ability to update type definition(shwethags via sumasai) ATLAS-352 Improve write performance on type and entity creation with Hbase (sumasai)
