Bug#155676: md5sums files

2010-03-07 Thread Anthony Towns
On Sun, Mar 7, 2010 at 10:28, Goswin von Brederlow goswin-...@web.de wrote:
 Anthony Towns a...@erisian.com.au writes:
 Advantages of doing it when uploading:
  - provides some sort of double check of what's being uploaded
  - saves CPU time on users' machines
   - avoids having bad checksums due to the user having bad hardware
     (which is one big use case of the files)

Big? It only makes a difference if:
  a) the corruption happens as soon as it's written, not after some time
  b) the file is too big/the system is too loaded to keep the file in
the page cache
  c) the system memory is corrupted just enough to screw the file but
not everything else

Compared to random make install invocations changing files in the
system and similar, that doesn't strike me as a big use case.

In any event, it's fairly easy to generate the checksum in the same
pass as generating the file, see the attached patch. (It's not as easy
to generalise to other hashes as the previous one, unfortunately)

If you're still worried, perhaps about having read() return bogus data
from the .deb that happens to still be valid when passed through
ungzip and untar and after you've already verified the entire file by
md5/sha1/sha256 when downloading, you're getting to the point of
trying to safely install on an actively malicious system, and
nothing's going to make that work.

Cheers,
aj

-- 
Anthony Towns a...@erisian.com.au
diff -x configure -x '*.m4' -x Makefile.in -urbN dpkg-1.15.5.6/configure.ac dpkg-1.15.5.6-aj/configure.ac
--- dpkg-1.15.5.6/configure.ac	2010-01-08 18:00:34.0 +1000
+++ dpkg-1.15.5.6-aj/configure.ac	2010-03-07 04:38:32.547372468 +1000
@@ -51,6 +51,16 @@
 esac])
 AC_SUBST(admindir)
 
+# Allow alternative default hash function
+hashtype=md5
+AC_ARG_WITH(hashtype,
+	AS_HELP_STRING([--with-hashtype=HASH],
+	   [hash function to use for .hashes files]),
+[case $with_hashtype in
+  md5|none) hashtype=$with_hashtype ;;
+  *) AC_MSG_ERROR([invalid hashtype specified]) ;;
+esac])
+AC_SUBST(hashtype)
 
 # Checks for programs.
 AC_PROG_CC
diff -x configure -x '*.m4' -x Makefile.in -urbN dpkg-1.15.5.6/debian/changelog dpkg-1.15.5.6-aj/debian/changelog
--- dpkg-1.15.5.6/debian/changelog	2010-01-09 04:02:03.0 +1000
+++ dpkg-1.15.5.6-aj/debian/changelog	2010-03-07 04:13:03.171356041 +1000
@@ -1,3 +1,11 @@
+dpkg (1.15.5.6+aj) unstable; urgency=low
+
+  * Non-maintainer upload.
+  * Automatically create /var/lib/dpkg/info/pkg.hashes containing MD5 hashes
+for unpacked files.
+
+ -- Anthony Towns a...@lazuline  Sun, 07 Mar 2010 04:12:32 +1000
+
 dpkg (1.15.5.6) unstable; urgency=low
 
   * dpkg-source: with format 3.0 (quilt) ensure quilt's .pc directory is
diff -x configure -x '*.m4' -x Makefile.in -urbN dpkg-1.15.5.6/lib/dpkg/buffer.c dpkg-1.15.5.6-aj/lib/dpkg/buffer.c
--- dpkg-1.15.5.6/lib/dpkg/buffer.c	2010-01-09 03:23:06.0 +1000
+++ dpkg-1.15.5.6-aj/lib/dpkg/buffer.c	2010-03-07 15:50:33.379710844 +1000
@@ -60,6 +60,13 @@
 	case BUFFER_WRITE_MD5:
 		buffer_md5_init(write_data);
 		break;
+	case BUFFER_WRITE_DUP:
+		{
+		  struct buffer_data *bddup = write_data-arg.ptr;
+		  buffer_init(NULL, bddup[0]);
+		  buffer_init(NULL, bddup[1]);
+		}
+		break;
 	}
 	return 0;
 }
@@ -90,6 +97,13 @@
 	case BUFFER_WRITE_MD5:
 		buffer_md5_done(write_data);
 		break;
+	case BUFFER_WRITE_DUP:
+		{
+		  struct buffer_data *bddup = write_data-arg.ptr;
+		  buffer_done(NULL, bddup[0]);
+		  buffer_done(NULL, bddup[1]);
+		}
+		break;
 	}
 	return 0;
 }
@@ -126,6 +140,14 @@
 	case BUFFER_WRITE_MD5:
 		MD5Updatestruct buffer_write_md5ctx *)data-arg.ptr)-ctx), buf, length);
 		break;
+	case BUFFER_WRITE_DUP:
+		{
+		  struct buffer_data *bddup = data-arg.ptr;
+  ret = buffer_write(bddup[0], buf, length, desc);
+		  if (ret != length) return ret;
+  ret = buffer_write(bddup[1], buf, length, desc);
+		}
+		break;
 	default:
 		internerr(unknown data type '%i' in buffer_write,
 		  data-type);
diff -x configure -x '*.m4' -x Makefile.in -urbN dpkg-1.15.5.6/lib/dpkg/buffer.h dpkg-1.15.5.6-aj/lib/dpkg/buffer.h
--- dpkg-1.15.5.6/lib/dpkg/buffer.h	2010-01-08 18:00:34.0 +1000
+++ dpkg-1.15.5.6-aj/lib/dpkg/buffer.h	2010-03-07 15:53:23.319707965 +1000
@@ -36,6 +36,7 @@
 #define BUFFER_WRITE_NULL		3
 #define BUFFER_WRITE_STREAM		4
 #define BUFFER_WRITE_MD5		5
+#define BUFFER_WRITE_DUP		6
 
 #define BUFFER_READ_FD			0
 #define BUFFER_READ_STREAM		1
@@ -52,6 +53,14 @@
 	buffer_hash(buf, hash, BUFFER_WRITE_MD5, limit)
 
 #if HAVE_C99
+# define fd_fdmd5(fd1, fd2, hash, limit, ...) \
+	do { \
+	struct buffer_data fdhash[2]; \
+fdhash[0].arg.i = fd2; fdhash[0].type = BUFFER_WRITE_FD; \
+fdhash[1].arg.ptr = hash; fdhash[1].type = BUFFER_WRITE_MD5; \
+buffer_copy_IntPtr(fd1, BUFFER_READ_FD, fdhash, BUFFER_WRITE_DUP, \
+	   limit, __VA_ARGS__); \
+} while(0)
 # define fd_md5(fd, hash, limit, ...) \
 	

Bug#155676: md5sums files

2010-03-07 Thread Anthony Towns
(I'm not subscribed to this list, so go ahead and Cc me)

On Thu, Mar 4, 2010 at 02:05, Peter Samuelson pe...@p12n.org wrote:
 [Wouter Verhelst]
  I must say I was somewhat surprised by these numbers. Out of 2483
  packages installed on my laptop, 2340 install md5sums.
 The surprising part, perhaps, is that dpkg itself didn't just generate
 the other 143 md5sums files at installation time.

The easy (and usually correct) reason for things like that is dpkg's
source is scary.

 I suggested this a long time ago and of course was met with so where's
 your patch?  Of course I was not willing to do the work.

See? Anyway, my patch is attached. It makes dpkg create a foo.hashes
when unpacking foo, whose contents looks like:

MD5:32b5e22f8e336b2f34e0dd87652e6dfc  usr/share/doc/mawk/changelog.gz
MD5:87a34f1f55ac3f7fec2c7fc82565e8eb  usr/share/doc/mawk/changelog.Debian.gz
...

Verification is a matter of something like:

$ cat /var/lib/dpkg/info/*.hashes | sed -n 's/^MD5://p' | (cd /;
md5sum -c) | grep -v ': OK$'

There's an option (--hash) that you can set to none to avoid
spending time calculating md5s if you so choose. Adding support for
sha1/sha256/whatever should be straightforward; afaik dpkg only has
code for md5 already built in though (though just invoking
/usr/bin/sha1sum etc would be an option of course).

Of course another option is just to pull the md5sums directly from the deb:

$ ar p /var/cache/apt/archives/ifupdown_0.6.9_i386.deb data.tar.gz |
tar --to-command='printf %s%s\n $(md5sum - | sed s/-$//)
${TAR_FILENAME#./}'  -xzf - |
diff - /var/lib/dpkg/info/ifupdown.md5sums
1,3d0
 346208729633adf45e2fa3f2bd3b19c6  etc/init.d/ifupdown
 c6fffaae03271f1641920105ce68796b  etc/init.d/ifupdown-clean
 fab851ca87c5deb9d6f665e610184648  etc/default/ifupdown
4a2
 a0f11cf1809a468c49b72e0aa0a8e26b  sbin/ifup

(md5sums doesn't normally list conffiles, but does list hardlinks; the
above command does the opposite)

 But
 fundamentally, shipping a md5sums file is really just a tradeoff in
 download size vs. installation speed, not unlike gzip vs. bzip2.

Advantages of doing in when unpacking:
 - choice of checksum is the admin's decision
 - we can quickly roll out support for sha1/sha256/crc/... checksums
by just changing one package
 - admin has hashes of exactly what was unpacked, no matter the source
 - no concerns about bugs in dh_md5sums or similar resulting in bad checksums

Advantages of doing it when uploading:
 - provides some sort of double check of what's being uploaded
 - saves CPU time on users' machines

For me, I'd rather have dpkg generate the hashes.

Cheers,
aj

--
Anthony Towns a...@erisian.com.au
diff -urb dpkg-1.15.5.6/debian/changelog dpkg-1.15.5.6-aj/debian/changelog
--- dpkg-1.15.5.6/debian/changelog	2010-01-09 04:02:03.0 +1000
+++ dpkg-1.15.5.6-aj/debian/changelog	2010-03-07 04:13:03.171356041 +1000
@@ -1,3 +1,11 @@
+dpkg (1.15.5.6+aj) unstable; urgency=low
+
+  * Non-maintainer upload.
+  * Automatically create /var/lib/dpkg/info/pkg.hashes containing MD5 hashes
+for unpacked files.
+
+ -- Anthony Towns a...@lazuline  Sun, 07 Mar 2010 04:12:32 +1000
+
 dpkg (1.15.5.6) unstable; urgency=low
 
   * dpkg-source: with format 3.0 (quilt) ensure quilt's .pc directory is
diff -urb dpkg-1.15.5.6/configure.ac dpkg-1.15.5.6-aj/configure.ac
--- dpkg-1.15.5.6/configure.ac	2010-01-08 18:00:34.0 +1000
+++ dpkg-1.15.5.6-aj/configure.ac	2010-03-07 04:38:32.547372468 +1000
@@ -51,6 +51,16 @@
 esac])
 AC_SUBST(admindir)
 
+# Allow alternative default hash function
+hashtype=md5
+AC_ARG_WITH(hashtype,
+	AS_HELP_STRING([--with-hashtype=HASH],
+	   [hash function to use for .hashes files]),
+[case $with_hashtype in
+  md5|none) hashtype=$with_hashtype ;;
+  *) AC_MSG_ERROR([invalid hashtype specified]) ;;
+esac])
+AC_SUBST(hashtype)
 
 # Checks for programs.
 AC_PROG_CC
diff -urb dpkg-1.15.5.6/src/Makefile.am dpkg-1.15.5.6-aj/src/Makefile.am
--- dpkg-1.15.5.6/src/Makefile.am	2010-01-09 03:23:06.0 +1000
+++ dpkg-1.15.5.6-aj/src/Makefile.am	2010-03-07 04:28:18.771356095 +1000
@@ -6,6 +6,7 @@
 AM_CPPFLAGS = \
 	-DLOCALEDIR=\$(localedir)\ \
 	-DADMINDIR=\$(admindir)\ \
+	-DHASHTYPE=\$(hashtype)\ \
 	-idirafter $(top_srcdir)/lib/compat \
 	-I$(top_builddir) \
 	-I$(top_srcdir)/lib
diff -urb dpkg-1.15.5.6/src/main.c dpkg-1.15.5.6-aj/src/main.c
--- dpkg-1.15.5.6/src/main.c	2010-01-09 03:23:06.0 +1000
+++ dpkg-1.15.5.6-aj/src/main.c	2010-03-07 04:29:59.271360858 +1000
@@ -187,6 +187,7 @@
 const char *admindir= ADMINDIR;
 const char *instdir= ;
 struct pkg_list *ignoredependss = NULL;
+const char *hashtype= HASHTYPE;
 
 static const struct forceinfo {
   const char *name;
@@ -516,6 +517,7 @@
   { admindir,  0,   1, NULL,  admindir, NULL,  0 },
   { instdir,   0,   1, NULL,  instdir,  NULL,  0 },
   { ignore-depends,0,   1, NULL,  NULL,  ignoredepends, 0 },
+  { hash,  0,   1, NULL,