Repository: incubator-atlas Updated Branches: refs/heads/master 70f715705 -> 2e02ae628
ATLAS-752 Column renames should retain traits/tags (svimal2106 via shwethags) Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/2e02ae62 Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/2e02ae62 Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/2e02ae62 Branch: refs/heads/master Commit: 2e02ae62850eef6b9fd65fb0c796748d0d348044 Parents: 70f7157 Author: Shwetha GS <[email protected]> Authored: Fri May 27 15:04:39 2016 +0530 Committer: Shwetha GS <[email protected]> Committed: Fri May 27 15:04:39 2016 +0530 ---------------------------------------------------------------------- .../org/apache/atlas/hive/hook/HiveHook.java | 77 +++++++++++++++++++- .../org/apache/atlas/hive/hook/HiveHookIT.java | 57 ++++++++++++--- release-log.txt | 1 + 3 files changed, 121 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/2e02ae62/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java ---------------------------------------------------------------------- diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java index 418e755..4234664 100755 --- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java +++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.Task; @@ -53,6 +54,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import scala.tools.cmd.gen.AnyVals; import java.net.MalformedURLException; import java.util.ArrayList; @@ -229,10 +231,12 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { case ALTERTABLE_SERIALIZER: case ALTERTABLE_ADDCOLS: case ALTERTABLE_REPLACECOLS: - case ALTERTABLE_RENAMECOL: case ALTERTABLE_PARTCOLTYPE: handleEventOutputs(dgiBridge, event, Type.TABLE); break; + case ALTERTABLE_RENAMECOL: + renameColumn(dgiBridge, event); + break; case ALTERTABLE_LOCATION: List<Pair<? extends Entity, Referenceable>> tablesUpdated = handleEventOutputs(dgiBridge, event, Type.TABLE); if (tablesUpdated != null && tablesUpdated.size() > 0) { @@ -297,6 +301,64 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { } } + private Pair<String, String> findChangedColNames(List<FieldSchema> oldColList, List<FieldSchema> newColList){ + HashMap<FieldSchema, Integer> oldColHashMap = new HashMap<>(); + HashMap<FieldSchema, Integer> newColHashMap = new HashMap<>(); + for (int i = 0; i < oldColList.size(); i++){ + oldColHashMap.put(oldColList.get(i), i); + newColHashMap.put(newColList.get(i), i); + } + + String changedColStringOldName = oldColList.get(0).getName(); + String changedColStringNewName = changedColStringOldName; + + for(int i = 0; i < oldColList.size(); i++){ + if (!newColHashMap.containsKey(oldColList.get(i))){ + changedColStringOldName = oldColList.get(i).getName(); + break; + } + } + + for(int i = 0; i < newColList.size(); i++){ + if (!oldColHashMap.containsKey(newColList.get(i))){ + changedColStringNewName = newColList.get(i).getName(); + break; + } + } + + return Pair.of(changedColStringOldName, changedColStringNewName); + } + + private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception{ + assert event.getInputs() != null && event.getInputs().size() == 1; + assert event.getOutputs() != null && event.getOutputs().size() > 0; + Table oldTable = event.getInputs().iterator().next().getTable(); + List<FieldSchema> oldColList = oldTable.getAllCols(); + List<FieldSchema> newColList = dgiBridge.hiveClient.getTable(event.getOutputs().iterator().next().getTable().getTableName()).getAllCols(); + assert oldColList.size() == newColList.size(); + + Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList); + String oldColName = changedColNamePair.getLeft(); + String newColName = changedColNamePair.getRight(); + for(WriteEntity writeEntity : event.getOutputs()){ + if (writeEntity.getType() == Type.TABLE){ + Table newTable = writeEntity.getTable(); + createOrUpdateEntities(dgiBridge, event.getUser(), writeEntity, true, oldTable); + final String newQualifiedTableName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), + newTable); + String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName); + String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName); + Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName()); + newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName); + + messages.add(new HookNotification.EntityPartialUpdateRequest(event.getUser(), + HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, + oldColumnQFName, newColEntity)); + } + } + handleEventOutputs(dgiBridge, event, Type.TABLE); + } + private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception { //crappy, no easy of getting new name assert event.getInputs() != null && event.getInputs().size() == 1; @@ -389,7 +451,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { return newSDEntity; } - private Referenceable createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, String user, Entity entity, boolean skipTempTables) throws Exception { + private Referenceable createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, String user, Entity entity, boolean skipTempTables, Table existTable) throws Exception { Database db = null; Table table = null; Partition partition = null; @@ -419,13 +481,16 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { Referenceable tableEntity = null; if (table != null) { - table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName()); + if (existTable != null) { + table = existTable; + } else { + table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName()); + } //If its an external table, even though the temp table skip flag is on, // we create the table since we need the HDFS path to temp table lineage. if (skipTempTables && table.isTemporary() && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) { - LOG.debug("Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name()); } else { @@ -438,6 +503,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { return tableEntity; } + private Referenceable createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, String user, Entity entity, boolean skipTempTables) throws Exception{ + return createOrUpdateEntities(dgiBridge, user, entity, skipTempTables, null); + } + private List<Pair<? extends Entity, Referenceable>> handleEventOutputs(HiveMetaStoreBridge dgiBridge, HiveEventContext event, Type entityType) throws Exception { List<Pair<? extends Entity, Referenceable>> entitiesCreatedOrUpdated = new ArrayList<>(); for (Entity entity : event.getOutputs()) { http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/2e02ae62/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java ---------------------------------------------------------------------- diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java index 84d9a52..4223d90 100755 --- a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java +++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java @@ -68,6 +68,7 @@ import static org.apache.atlas.hive.hook.HiveHook.normalize; import static org.apache.atlas.hive.model.HiveDataModelGenerator.NAME; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; public class HiveHookIT { @@ -327,8 +328,8 @@ public class HiveHookIT { String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName); JSONObject response = atlasClient.getInputGraph(datasetName); JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices"); - Assert.assertTrue(vertices.has(viewId)); - Assert.assertTrue(vertices.has(table1Id)); + assertTrue(vertices.has(viewId)); + assertTrue(vertices.has(table1Id)); //Alter the view from table2 String table2Name = createTable(); @@ -343,13 +344,13 @@ public class HiveHookIT { datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName); response = atlasClient.getInputGraph(datasetName); vertices = response.getJSONObject("values").getJSONObject("vertices"); - Assert.assertTrue(vertices.has(viewId)); + assertTrue(vertices.has(viewId)); //This is through the alter view process - Assert.assertTrue(vertices.has(table2Id)); + assertTrue(vertices.has(table2Id)); //This is through the Create view process - Assert.assertTrue(vertices.has(table1Id)); + assertTrue(vertices.has(table1Id)); //Outputs dont exist response = atlasClient.getOutputGraph(datasetName); @@ -668,7 +669,7 @@ public class HiveHookIT { public void assertOnEntity(final Referenceable entity) throws Exception { Referenceable sd = ((Referenceable) entity.get(HiveDataModelGenerator.STORAGE_DESC)); String location = (String) sd.get(HiveDataModelGenerator.LOCATION); - Assert.assertTrue(location.contains(newTableName)); + assertTrue(location.contains(newTableName)); } }); } @@ -912,6 +913,42 @@ public class HiveHookIT { } @Test + public void testAlterTableWithoutHookConf() throws Exception { + HiveConf conf = new HiveConf(); + conf.set("hive.exec.post.hooks", ""); + SessionState ss = new SessionState(conf); + ss = SessionState.start(ss); + SessionState.setCurrentSessionState(ss); + Driver driver = new Driver(conf); + String tableName = tableName(); + String createCommand = "create table " + tableName + " (id int, name string)"; + driver.run(createCommand); + assertTableIsNotRegistered(DEFAULT_DB, tableName); + String command = "alter table " + tableName + " change id id_new string"; + runCommand(command); + assertTableIsRegistered(DEFAULT_DB, tableName); + String tbqn = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); + assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id_new")); + } + + @Test + public void testTraitsPreservedOnColumnRename() throws Exception { + String tableName = createTable(); + String tbqn = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName); + String guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id")); + String trait = createTrait(guid); + String oldColName = "id"; + String newColName = "id_new"; + String query = String.format("alter table %s change %s %s string", tableName, oldColName, newColName); + runCommand(query); + + String guid2 = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id_new")); + assertEquals(guid2, guid); + + assertTrue(atlasClient.getEntity(guid2).getTraits().contains(trait)); + } + + @Test public void testAlterViewRename() throws Exception { String tableName = createTable(); String viewName = tableName(); @@ -1490,14 +1527,14 @@ public class HiveHookIT { String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, db2, table2); JSONObject response = atlasClient.getInputGraph(datasetName); JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices"); - Assert.assertTrue(vertices.has(table1Id)); - Assert.assertTrue(vertices.has(table2Id)); + assertTrue(vertices.has(table1Id)); + assertTrue(vertices.has(table2Id)); datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, table1); response = atlasClient.getOutputGraph(datasetName); vertices = response.getJSONObject("values").getJSONObject("vertices"); - Assert.assertTrue(vertices.has(table1Id)); - Assert.assertTrue(vertices.has(table2Id)); + assertTrue(vertices.has(table1Id)); + assertTrue(vertices.has(table2Id)); } //For ATLAS-448 http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/2e02ae62/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index bceb7e9..b6d1f9d 100644 --- a/release-log.txt +++ b/release-log.txt @@ -22,6 +22,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags) ALL CHANGES: +ATLAS-752 Column renames should retain traits/tags (svimal2106 via shwethags) ATLAS-821 Atlas UI - Add arrow to navigate to child term (kevalbhatt18 via yhemanth) ATLAS-812 Atlas UI - Associate Terms with Assets (kevalbhatt18 via yhemanth) ATLAS-809 JAAS configuration needed for Kafka interaction via Atlas config file (abhayk via shwethags)
