Source: diffoscope
Version: ada1a1dcdc19217fb611e0a1e57bc3744399aefa
Severity: wishlist
Tags: patch

It would be useful for diffoscope to output differences in omni.ja files as
for other Zip files, instead of ending up with a diff of an hexdump.

The attached patch implements a minimal support for this. It however doesn't
look at the difference in the `preload` value.

-- System Information:
Debian Release: stretch/sid
  APT prefers unstable
  APT policy: (500, 'unstable'), (1, 'experimental')
Architecture: amd64 (x86_64)
Foreign Architectures: i386

Kernel: Linux 4.2.0-1-amd64 (SMP w/4 CPU cores)
Locale: LANG=ja_JP.UTF-8, LC_CTYPE=ja_JP.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index b4615c9..b5dd320 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -72,7 +72,7 @@ from diffoscope.comparators.symlink import Symlink
 from diffoscope.comparators.text import TextFile
 from diffoscope.comparators.tar import TarFile
 from diffoscope.comparators.xz import XzFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
 
 
 def bail_if_non_existing(*paths):
@@ -154,6 +154,7 @@ FILE_CLASSES = (
     TarFile,
     XzFile,
     ZipFile,
+    MozillaZipFile,
     ImageFile,
     )
 
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index ecdc77b..42c9a9f 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -111,3 +111,54 @@ class ZipFile(File):
         zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
                              Difference.from_command(ZipinfoVerbose, self.path, other.path)
         return [zipinfo_difference]
+
+
+class MozillaZipCommandMixin(object):
+    def wait(self):
+        # zipinfo emits an error when reading Mozilla-optimized ZIPs,
+        # which is fine to ignore.
+        super(Zipinfo, self).wait()
+        return 0
+
+
+class MozillaZipinfo(MozillaZipCommandMixin, Zipinfo): pass
+
+
+class MozillaZipinfoVerbose(MozillaZipCommandMixin, ZipinfoVerbose): pass
+
+
+class MozillaZipContainer(ZipContainer):
+    def open_archive(self):
+        # This is gross: Monkeypatch zipfile._EndRecData to work with
+        # Mozilla-optimized ZIPs
+        _orig_EndRecData = zipfile._EndRecData
+        def _EndRecData(fh):
+            endrec = _orig_EndRecData(fh)
+            if endrec:
+                endrec[zipfile._ECD_LOCATION] = (endrec[zipfile._ECD_OFFSET] +
+                                                 endrec[zipfile._ECD_SIZE])
+            return endrec
+        zipfile._EndRecData = _EndRecData
+        result = super(MozillaZipContainer, self).open_archive()
+        zipfile._EndRecData = _orig_EndRecData
+        return result
+
+
+class MozillaZipFile(File):
+    CONTAINER_CLASS = MozillaZipContainer
+
+    @staticmethod
+    def recognizes(file):
+        # Mozilla-optimized ZIPs start with a 32-bit little endian integer
+        # indicating the amount of data to preload, followed by the ZIP
+        # central directory (with a PK\x01\x02 signature)
+        with open(file.path, 'rb') as f:
+            preload = f.read(4)
+            if len(preload) == 4:
+                signature = f.read(4)
+                return signature == b'PK\x01\x02'
+
+    def compare_details(self, other, source=None):
+        zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \
+                             Difference.from_command(MozillaZipinfoVerbose, self.path, other.path)
+        return [zipinfo_difference]
_______________________________________________
Reproducible-builds mailing list
Reproducible-builds@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/reproducible-builds

Reply via email to