Madhan Neethiraj created ATLAS-4878:
---------------------------------------

             Summary: utility to analyze hook notifications
                 Key: ATLAS-4878
                 URL: https://issues.apache.org/jira/browse/ATLAS-4878
             Project: Atlas
          Issue Type: Improvement
          Components:  atlas-core
            Reporter: Madhan Neethiraj
            Assignee: Madhan Neethiraj


A utility to analyze notifications received from hooks to gather following 
details will be useful in troubleshooting:
 # number of notifications per notification type (CREATE, UPDATE, 
PARTIAL_UPDATE, DELETE, ..)
 # number of entities referenced in notifications per entity type
 # number of entity operations performed while processing the notifications 
(create/update/delete)

 

For example, following details by analyzing 114k notifications from Hive hook 
show that 94% of entities processed are of type hive_column and 
hive_column_lineage :
{noformat}
{
  "notifications":         114755,
  "entities":              598435,
  "notificationEntities": 2575347,
  "notificationByType": {
    "ENTITY_CREATE_V2":         49428,
    "ENTITY_FULL_UPDATE_V2":     1597,
    "ENTITY_PARTIAL_UPDATE_V2": 36561,
    "ENTITY_DELETE_V2":         27169
  },
  "notificationEntityByType": {
    "hdfs_path":             16417,
    "hive_db":               20471,
    "hive_table":            57143,
    "hive_storagedesc":      30018,
    "hive_column":          685384,
    "hive_process":          41512
    "hive_column_lineage": 1724402,
  },
  "entityOperations": {
    "CREATE":         598435,
    "UPDATE":        1913182
    "PARTIAL_UPDATE":  36561,
    "DELETE":          27169
  },
  "entityOperationsByType": {
    "CREATE": {
      "hdfs_path":            10940,
      "hive_db":                224,
      "hive_table":           22154,
      "hive_storagedesc":     15280,
      "hive_column":         332332,
      "hive_process":         23462,
      "hive_column_lineage": 194043
    },
    "UPDATE" {
      "hdfs_path":              5477,
      "hive_column":          319559,
      "hive_column_lineage": 1530359,
      "hive_db":               20203,
      "hive_process":          18050,
      "hive_storagedesc":      13204,
      "hive_table":             6330
    },
   "PARTIAL_UPDATE": {
     "hive_column":      33493,
     "hive_storagedesc":  1534,
     "hive_table":        1534
    },
    "DELETE": {
      "hive_db":       44,
      "hive_table": 27125
    }
  }
} {noformat}
 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to