Source: sphinx
Version: 1.2.3+dfsg-1
Severity: wishlist
Tags: patch
User: reproducible-builds@lists.alioth.debian.org
Usertags: toolchain randomness
X-Debbugs-Cc: reproducible-builds@lists.alioth.debian.org

Hi,

While working on the "reproducible builds" effort [1], we have noticed
that sphinx is generating documentation that is not repoducible.

For example, its output includes non-deterministic memory references
such as:

   <__main__.A at 0x7f68cb685710>

In addition, various generated (objects.inv, searchindex.js,
translations) do not output their keys in a determinstic order,
resulting in further randomness.

The attached patch attempts to remedy these issues. Once applied, many
packages that use sphinx--but alas not sphinx itself yet!--can be built
reproducibly in our current experimental
framework.

 [1]: https://wiki.debian.org/ReproducibleBuilds


Regards,

-- 
      ,''`.
     : :'  :     Chris Lamb
     `. `'`      la...@debian.org / chris-lamb.co.uk
       `-
diff --git a/sphinx/builders/html.py b/sphinx/builders/html.py
index 9c039e3..f489a35 100644
--- a/sphinx/builders/html.py
+++ b/sphinx/builders/html.py
@@ -269,7 +269,7 @@ class StandaloneHTMLBuilder(Builder):
         # html_domain_indices can be False/True or a list of index names
         indices_config = self.config.html_domain_indices
         if indices_config:
-            for domain in self.env.domains.itervalues():
+            for domain in sorted(self.env.domains.itervalues()):
                 for indexcls in domain.indices:
                     indexname = '%s-%s' % (domain.name, indexcls.name)
                     if isinstance(indices_config, list):
@@ -808,7 +808,7 @@ class StandaloneHTMLBuilder(Builder):
             compressor = zlib.compressobj(9)
             for domainname, domain in self.env.domains.iteritems():
                 for name, dispname, type, docname, anchor, prio in \
-                        domain.get_objects():
+                        sorted(domain.get_objects()):
                     if anchor.endswith(name):
                         # this can shorten the inventory by as much as 25%
                         anchor = anchor[:-len(name)] + '$'
diff --git a/sphinx/ext/autodoc.py b/sphinx/ext/autodoc.py
index 423f921..721fbb4 100644
--- a/sphinx/ext/autodoc.py
+++ b/sphinx/ext/autodoc.py
@@ -60,7 +60,6 @@ class DefDict(dict):
 
 identity = lambda x: x
 
-
 class Options(dict):
     """A dict/attribute hybrid that returns None on nonexisting keys."""
     def __getattr__(self, name):
@@ -975,7 +974,8 @@ class FunctionDocumenter(DocstringSignatureMixin, 
ModuleLevelDocumenter):
                 argspec = getargspec(self.object.__init__)
                 if argspec[0]:
                     del argspec[0][0]
-        args = inspect.formatargspec(*argspec)
+        args = inspect.formatargspec(*argspec,
+                                     formatvalue=lambda x: '=' + safe_repr(x))
         # escape backslashes for reST
         args = args.replace('\\', '\\\\')
         return args
@@ -1030,7 +1030,8 @@ class ClassDocumenter(ModuleLevelDocumenter):
             return None
         if argspec[0] and argspec[0][0] in ('cls', 'self'):
             del argspec[0][0]
-        return inspect.formatargspec(*argspec)
+        return inspect.formatargspec(*argspec,
+                                     formatvalue=lambda x: '=' + safe_repr(x))
 
     def format_signature(self):
         if self.doc_as_attr:
@@ -1229,7 +1230,8 @@ class MethodDocumenter(DocstringSignatureMixin, 
ClassLevelDocumenter):
         argspec = getargspec(self.object)
         if argspec[0] and argspec[0][0] in ('cls', 'self'):
             del argspec[0][0]
-        args = inspect.formatargspec(*argspec)
+        args = inspect.formatargspec(*argspec,
+                                     formatvalue=lambda x: '=' + safe_repr(x))
         # escape backslashes for reST
         args = args.replace('\\', '\\\\')
         return args
diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py
index bd95ecc..760b137 100644
--- a/sphinx/search/__init__.py
+++ b/sphinx/search/__init__.py
@@ -268,13 +268,13 @@ class IndexBuilder(object):
                     if fn in fn2index:
                         rv[k] = fn2index[fn]
                 else:
-                    rv[k] = [fn2index[fn] for fn in v if fn in fn2index]
+                    rv[k] = sorted([fn2index[fn] for fn in v if fn in 
fn2index])
         return rvs
 
     def freeze(self):
         """Create a usable data structure for serializing."""
-        filenames = self._titles.keys()
-        titles = self._titles.values()
+        filenames = sorted(self._titles.keys())
+        titles = sorted(self._titles.values())
         fn2index = dict((f, i) for (i, f) in enumerate(filenames))
         terms, title_terms = self.get_terms(fn2index)
 
diff --git a/sphinx/util/inspect.py b/sphinx/util/inspect.py
index cdbfea7..e04f1fa 100644
--- a/sphinx/util/inspect.py
+++ b/sphinx/util/inspect.py
@@ -9,6 +9,7 @@
     :license: BSD, see LICENSE for details.
 """
 
+import re
 import sys
 
 # this imports the standard library inspect module without resorting to
@@ -135,7 +136,10 @@ def safe_repr(object):
     except Exception:
         raise ValueError
     if isinstance(s, bytes):
-        return force_decode(s, None).replace('\n', ' ')
+        s = force_decode(s, None)
+    # Strip non-deterministic memory addresses such as
+    # ``<__main__.A at 0x7f68cb685710>``
+    s = re.sub(r' at 0x[0-9a-f]{8,12}(?=>$)', '', s)
     return s.replace('\n', ' ')
 
 
diff --git a/sphinx/util/jsdump.py b/sphinx/util/jsdump.py
index 85845a7..cc62eab 100644
--- a/sphinx/util/jsdump.py
+++ b/sphinx/util/jsdump.py
@@ -87,11 +87,13 @@ def dumps(obj, key=False):
     elif isinstance(obj, (int, long, float)):
         return str(obj)
     elif isinstance(obj, dict):
-        return '{%s}' % ','.join('%s:%s' % (
+        return '{%s}' % ','.join(sorted('%s:%s' % (
             dumps(key, True),
             dumps(value)
-        ) for key, value in obj.iteritems())
-    elif isinstance(obj, (tuple, list, set)):
+        ) for key, value in obj.iteritems()))
+    elif isinstance(obj, set):
+        return '[%s]' % ','.join(sorted(dumps(x) for x in obj))
+    elif isinstance(obj, (tuple, list)):
         return '[%s]' % ','.join(dumps(x) for x in obj)
     elif isinstance(obj, basestring):
         return encode_string(obj)
_______________________________________________
Reproducible-builds mailing list
Reproducible-builds@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/reproducible-builds

Reply via email to