NoQ created this revision.
NoQ added reviewers: dcoughlin, xazax.hun, a_sidorin, rnkovacs, Szelethus, 
baloghadamsoftware, Charusso.
Herald added subscribers: cfe-commits, dkrupp, donat.nagy, mikhail.ramalho, 
a.sidorin, szepet.
Herald added a project: clang.
NoQ added a comment.
NoQ added a parent revision: D62638: [analyzer] A Python script to prettify the 
ExplodedGraph dumps..

I'll add some tests as soon as i'm sure tests for this thing actually work (by 
landing D62638 <https://reviews.llvm.org/D62638>).


Implement a mode in which the rewriter prints differences between adjacent 
program states rather than the whole states. The whole state is still printed 
for nodes with multiple predecessors (because merge diffs were annoying to 
implement and it's still nice to have the full state occasionally) and for leaf 
nodes (because they're the important ones).

F8999355: Screen Shot 2019-05-31 at 4.57.55 PM.png 
<https://reviews.llvm.org/F8999355>

The diffs are computed "semantically" as opposed to plain text diffs. I.e., the 
diff algorithm is hand-crafted separately for every state trait, taking the 
underlying data structures into account. This is especially nice for 
`Environment` because textual diffs would have been terrible. This, of course, 
produces quite some boilerplate, but i think it's isolated enough to not bother 
me that much.


Repository:
  rC Clang

https://reviews.llvm.org/D62761

Files:
  clang/utils/analyzer/exploded-graph-rewriter.py

Index: clang/utils/analyzer/exploded-graph-rewriter.py
===================================================================
--- clang/utils/analyzer/exploded-graph-rewriter.py
+++ clang/utils/analyzer/exploded-graph-rewriter.py
@@ -9,6 +9,13 @@
 import re
 
 
+# A helper function for finding the difference between two dictionaries.
+def diff_dicts(curr, prev):
+    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
+    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
+    return (removed, added)
+
+
 # A deserialized source location.
 class SourceLocation(object):
     def __init__(self, json_loc):
@@ -38,13 +45,21 @@
             self.block_id = json_pp['block_id']
 
 
-# A value of a single expression in a deserialized Environment.
-class EnvironmentBinding(object):
-    def __init__(self, json_eb):
-        super(EnvironmentBinding, self).__init__()
-        self.stmt_id = json_eb['stmt_id']
-        self.pretty = json_eb['pretty']
-        self.value = json_eb['value']
+# A single expression acting as a key in a deserialized Environment.
+class EnvironmentBindingKey(object):
+    def __init__(self, json_ek):
+        super(EnvironmentBindingKey, self).__init__()
+        self.stmt_id = json_ek['stmt_id']
+        self.pretty = json_ek['pretty']
+
+    def _key(self):
+        return self.stmt_id
+
+    def __eq__(self, other):
+        return self._key() == other._key()
+
+    def __hash__(self):
+        return hash(self._key())
 
 
 # Deserialized description of a location context.
@@ -56,6 +71,15 @@
         self.decl = json_frame['calling']
         self.line = json_frame['call_line']
 
+    def _key(self):
+        return self.lctx_id
+
+    def __eq__(self, other):
+        return self._key() == other._key()
+
+    def __hash__(self):
+        return hash(self._key())
+
 
 # A group of deserialized Environment bindings that correspond to a specific
 # location context.
@@ -63,8 +87,17 @@
     def __init__(self, json_frame):
         super(EnvironmentFrame, self).__init__()
         self.location_context = LocationContext(json_frame)
-        self.bindings = [EnvironmentBinding(b) for b in json_frame['items']] \
-            if json_frame['items'] is not None else []
+        self.bindings = collections.OrderedDict(
+            [(EnvironmentBindingKey(b),
+              b['value']) for b in json_frame['items']]
+            if json_frame['items'] is not None else [])
+
+    def diff_bindings(self, prev):
+        return diff_dicts(self.bindings, prev.bindings)
+
+    def is_different(self, prev):
+        removed, added = self.diff_bindings(prev)
+        return len(removed) != 0 or len(added) != 0
 
 
 # A deserialized Environment.
@@ -73,29 +106,80 @@
         super(Environment, self).__init__()
         self.frames = [EnvironmentFrame(f) for f in json_e]
 
+    def diff_frames(self, prev):
+        # TODO: It's difficult to display a good diff when frame numbers shift.
+        if len(self.frames) != len(prev.frames):
+            return None
+
+        updated = []
+        for i in range(len(self.frames)):
+            f = self.frames[i]
+            prev_f = prev.frames[i]
+            if f.location_context == prev_f.location_context:
+                if f.is_different(prev_f):
+                    updated.append(i)
+            else:
+                # We have the whole frame replaced with another frame.
+                # TODO: Produce a nice diff.
+                return None
+
+        # TODO: Add support for added/removed.
+        return updated
+
+    def is_different(self, prev):
+        updated = self.diff_frames(prev)
+        return updated is None or len(updated) > 0
+
+
+# A single binding key in a deserialized RegionStore cluster.
+class StoreBindingKey(object):
+    def __init__(self, json_sk):
+        super(StoreBindingKey, self).__init__()
+        self.kind = json_sk['kind']
+        self.offset = json_sk['offset']
 
-# A single binding in a deserialized RegionStore cluster.
-class StoreBinding(object):
-    def __init__(self, json_sb):
-        super(StoreBinding, self).__init__()
-        self.kind = json_sb['kind']
-        self.offset = json_sb['offset']
-        self.value = json_sb['value']
+    def _key(self):
+        return (self.kind, self.offset)
+
+    def __eq__(self, other):
+        return self._key() == other._key()
+
+    def __hash__(self):
+        return hash(self._key())
 
 
 # A single cluster of the deserialized RegionStore.
 class StoreCluster(object):
     def __init__(self, json_sc):
         super(StoreCluster, self).__init__()
-        self.base_region = json_sc['cluster']
-        self.bindings = [StoreBinding(b) for b in json_sc['items']]
+        self.bindings = collections.OrderedDict(
+            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
+
+    def diff_bindings(self, prev):
+        return diff_dicts(self.bindings, prev.bindings)
+
+    def is_different(self, prev):
+        removed, added = self.diff_bindings(prev)
+        return len(removed) != 0 or len(added) != 0
 
 
 # A deserialized RegionStore.
 class Store(object):
     def __init__(self, json_s):
         super(Store, self).__init__()
-        self.clusters = [StoreCluster(c) for c in json_s]
+        self.clusters = collections.OrderedDict(
+            [(c['cluster'], StoreCluster(c)) for c in json_s])
+
+    def diff_clusters(self, prev):
+        removed = [k for k in prev.clusters if k not in self.clusters]
+        added = [k for k in self.clusters if k not in prev.clusters]
+        updated = [k for k in prev.clusters if k in self.clusters
+                   and prev.clusters[k].is_different(self.clusters[k])]
+        return (removed, added, updated)
+
+    def is_different(self, prev):
+        removed, added, updated = self.diff_clusters(prev)
+        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
 
 
 # A deserialized program state.
@@ -203,8 +287,9 @@
 # A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
 # syntax highlighing.
 class DotDumpVisitor(object):
-    def __init__(self):
+    def __init__(self, do_diffs):
         super(DotDumpVisitor, self).__init__()
+        self._do_diffs = do_diffs
 
     @staticmethod
     def _dump_raw(s):
@@ -220,6 +305,14 @@
                .replace('\\l', '<br />')
                .replace('|', ''), end='')
 
+    @staticmethod
+    def _diff_plus_minus(is_added):
+        if is_added is None:
+            return ''
+        if is_added:
+            return '<font color="forestgreen">+</font>'
+        return '<font color="red">-</font>'
+
     def visit_begin_graph(self, graph):
         self._graph = graph
         self._dump_raw('digraph "ExplodedGraph" {\n')
@@ -265,60 +358,116 @@
                        '<font color="%s">%s</font></td></tr>'
                        % (color, p.kind))
 
-    def visit_environment(self, e):
+    def visit_environment(self, e, prev_e=None):
         self._dump('<table border="0">')
 
-        for f in e.frames:
-            self._dump('<tr><td align="left"><b>%s</b></td>'
+        def dump_location_context(lc, is_added=None):
+            self._dump('<tr><td align="center">%s</td>'
+                       '<td align="left"><b>%s</b></td>'
                        '<td align="left"><font color="grey60">%s </font>'
                        '%s</td></tr>'
-                       % (f.location_context.caption,
-                          f.location_context.decl,
-                          ('(line %s)' % f.location_context.line)
-                          if f.location_context.line is not None else ''))
-            for b in f.bindings:
-                self._dump('<tr><td align="left"><i>S%s</i></td>'
-                           '<td align="left">%s</td>'
-                           '<td align="left">%s</td></tr>'
-                           % (b.stmt_id, b.pretty, b.value))
+                       % (self._diff_plus_minus(is_added),
+                          lc.caption, lc.decl,
+                          ('(line %s)' % lc.line) if lc.line is not None
+                          else ''))
+
+        def dump_binding(f, b, is_added=None):
+            self._dump('<tr><td align="center">%s</td>'
+                       '<td align="left"><i>S%s</i></td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td></tr>'
+                       % (self._diff_plus_minus(is_added),
+                          b.stmt_id, b.pretty, f.bindings[b]))
+
+        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
+        if frames_updated:
+            for i in frames_updated:
+                f = e.frames[i]
+                prev_f = prev_e.frames[i]
+                dump_location_context(f.location_context)
+                bindings_removed, bindings_added = f.diff_bindings(prev_f)
+                for b in bindings_removed:
+                    dump_binding(prev_f, b, False)
+                for b in bindings_added:
+                    dump_binding(f, b, True)
+        else:
+            for f in e.frames:
+                dump_location_context(f.location_context)
+                for b in f.bindings:
+                    dump_binding(f, b)
 
         self._dump('</table>')
 
-    def visit_store(self, s):
+    def visit_store(self, s, prev_s=None):
         self._dump('<table border="0">')
 
-        for c in s.clusters:
-            for b in c.bindings:
-                self._dump('<tr><td align="left">%s</td>'
-                           '<td align="left">%s</td>'
-                           '<td align="left">%s</td>'
-                           '<td align="left">%s</td></tr>'
-                           % (c.base_region, b.offset,
-                              '(<i>Default</i>)' if b.kind == 'Default'
-                              else '',
-                              b.value))
+        def dump_binding(s, c, b, is_added=None):
+            self._dump('<tr><td align="center">%s</td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td></tr>'
+                       % (self._diff_plus_minus(is_added),
+                          c, b.offset,
+                          '(<i>Default</i>)' if b.kind == 'Default'
+                          else '',
+                          s.clusters[c].bindings[b]))
+
+        if prev_s is not None:
+            clusters_removed, clusters_added, clusters_updated = \
+                s.diff_clusters(prev_s)
+            for c in clusters_removed:
+                for b in prev_s.clusters[c].bindings:
+                    dump_binding(prev_s, c, b, False)
+            for c in clusters_updated:
+                bindings_removed, bindings_added = \
+                    s.clusters[c].diff_bindings(prev_s.clusters[c])
+                for b in bindings_removed:
+                    dump_binding(prev_s, c, b, False)
+                for b in bindings_added:
+                    dump_binding(s, c, b, True)
+            for c in clusters_added:
+                for b in s.clusters[c].bindings:
+                    dump_binding(s, c, b, True)
+        else:
+            for c in s.clusters:
+                for b in s.clusters[c].bindings:
+                    dump_binding(s, c, b)
 
         self._dump('</table>')
 
-    def visit_state(self, s):
-        self._dump('<tr><td align="left">'
-                   '<b>Store: </b>')
+    def visit_state(self, s, prev_s):
+        # == Store ==
+        self._dump('<tr><td align="left"><b>Store: </b>')
         if s.store is None:
             self._dump('<i> Nothing!</i>')
         else:
-            self._dump('</td></tr>'
-                       '<tr><td align="left">')
-            self.visit_store(s.store)
+            if prev_s is not None and prev_s.store is not None:
+                if s.store.is_different(prev_s.store):
+                    self._dump('</td></tr><tr><td align="left">')
+                    self.visit_store(s.store, prev_s.store)
+                else:
+                    self._dump('<i> No changes!</i>')
+            else:
+                self._dump('</td></tr><tr><td align="left">')
+                self.visit_store(s.store)
+        self._dump('</td></tr><hr />')
 
-        self._dump('</td></tr><hr />'
-                   '<tr><td align="left">'
+        # == Environment ==
+        self._dump('<tr><td align="left">'
                    '<b>Environment: </b>')
         if s.environment is None:
             self._dump('<i> Nothing!</i>')
         else:
-            self._dump('</td></tr>'
-                       '<tr><td align="left">')
-            self.visit_environment(s.environment)
+            if prev_s is not None and prev_s.environment is not None:
+                if s.environment.is_different(prev_s.environment):
+                    self._dump('</td></tr><tr><td align="left">')
+                    self.visit_environment(s.environment, prev_s.environment)
+                else:
+                    self._dump('<i> No changes!</i>')
+            else:
+                self._dump('</td></tr><tr><td align="left">')
+                self.visit_environment(s.environment)
 
         self._dump('</td></tr>')
 
@@ -343,7 +492,14 @@
 
         if node.state is not None:
             self._dump('<hr />')
-            self.visit_state(node.state)
+            prev_s = None
+            # Do diffs only when we have a unique predecessor.
+            # Don't do diffs on the leaf nodes because they're
+            # the important ones.
+            if self._do_diffs and len(node.predecessors) == 1 \
+               and len(node.successors) > 0:
+                prev_s = self._graph.nodes[node.predecessors[0]].state
+            self.visit_state(node.state, prev_s)
         self._dump_raw('</table>>];\n')
 
     def visit_edge(self, pred, succ):
@@ -373,13 +529,13 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('filename', type=str)
-    parser.add_argument('-d', '--debug', action='store_const', dest='loglevel',
-                        const=logging.DEBUG, default=logging.WARNING,
-                        help='enable debug prints')
     parser.add_argument('-v', '--verbose', action='store_const',
-                        dest='loglevel', const=logging.INFO,
+                        dest='loglevel', const=logging.DEBUG,
                         default=logging.WARNING,
                         help='enable info prints')
+    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
+                        const=True, default=False,
+                        help='display differences between states')
     args = parser.parse_args()
     logging.basicConfig(level=args.loglevel)
 
@@ -390,7 +546,7 @@
             graph.add_raw_line(raw_line)
 
     explorer = Explorer()
-    visitor = DotDumpVisitor()
+    visitor = DotDumpVisitor(args.diff)
     explorer.explore(graph, visitor)
 
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to