Hi there...

There is a functionality that I want to suggest for the install program 
contained in coreutils. I believe that it should have an option to compare 
source and destination files before replacing the destination.

I've implemented this as a wrapper script and I'm using it for about half a 
year without any problems and I realy believe that the install program should 
have this feature as built-in.

It is very usefull for people that do a lot of compiling since it prevents 
makefile dependencies to fail because of header files beeing overwritten over 
and over again. It also reduces filesystem writes a lot and leads to smaller 
incremental backups.

Example: KDE is distributed in many source packages where each one depends on 
others. Reinstalling kdelibs means that all other packages will have to be 
rebuild from the scratch.. In other words, if you have already compiled and 
installed kdelibs and kdebase and you 'make install' again in kdelibs, 
kdebase will require a rebuild since most header files will be modified (just 
the [cm]time). If the install program was checking for content before 
overwritting a file this whould be avoided.

I'm attaching a patch for this. Use and modify it under GPL if you like...

Some benchmarks:

hell:/tmp/coreutils-5.0/src$ sync ; time /bin/sh -c ' cnt=0; while [ $cnt -lt 
10 ] ; do ./ginstall ~/tmp1 ~/tmp2 ; let cnt++; done'
real    0m14.860s
user    0m0.305s
sys     0m10.576s

hell:/tmp/coreutils-5.0/src$ sync ; time /bin/sh -c ' cnt=0; while [ $cnt -lt 
10 ] ; do ./ginstall ~/tmp1 ~/tmp2 ; let cnt++; done'
real    0m18.617s
user    0m0.270s
sys     0m10.627s

hell:/tmp/coreutils-5.0/src$ sync ; time /bin/sh -c ' cnt=0; while [ $cnt -lt 
10 ] ; do ./ginstall -C ~/tmp1 ~/tmp2 ; let cnt++; done'
real    0m9.559s
user    0m4.981s
sys     0m3.653s

hell:/tmp/coreutils-5.0/src$ sync ; time /bin/sh -c ' cnt=0; while [ $cnt -lt 
10 ] ; do ./ginstall -C ~/tmp1 ~/tmp2 ; let cnt++; done'
real    0m9.925s
user    0m4.995s
sys     0m3.656s

Tested on my system with a 50MB test file. Notice the delay when not using 
-C... On the second test the real time is 7 seconds more than user+sys and it 
is caused by kjournald which takes a lot of cpu time when not using -C.

I believe that a faster implementation is possible by using mmap() to compare 
source and destination files but I don't know anything about portability when 
using mmap(), so I've used open()/read().

Also I've not used O_LARGEFILE because a quick grep for it in coretuils-5.0 
returned no results.

<<V13>>
--- install.c.orig	2003-11-01 13:46:59.000000000 +0200
+++ install.c	2003-11-01 15:09:14.000000000 +0200
@@ -92,6 +92,8 @@
 static void strip (const char *path);
 void usage (int status);
 
+static int issame (const char *from, const char *to);
+
 /* The name this program was run with, for error messages. */
 char *program_name;
 
@@ -119,9 +121,13 @@
 /* If nonzero, install a directory instead of a regular file. */
 static int dir_arg;
 
+/* If nonzero, compare source and destination files before overwriting */
+static int compare_files;
+
 static struct option const long_options[] =
 {
   {"backup", optional_argument, NULL, 'b'},
+  {"compare", no_argument, NULL, 'C'},
   {"directory", no_argument, NULL, 'd'},
   {"group", required_argument, NULL, 'g'},
   {"mode", required_argument, NULL, 'm'},
@@ -200,13 +206,14 @@
   group_name = NULL;
   strip_files = 0;
   dir_arg = 0;
+  compare_files = 0;
   umask (0);
 
   /* FIXME: consider not calling getenv for SIMPLE_BACKUP_SUFFIX unless
      we'll actually use backup_suffix_string.  */
   backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX");
 
-  while ((optc = getopt_long (argc, argv, "bcsDdg:m:o:pvV:S:", long_options,
+  while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pvV:S:", long_options,
 			      NULL)) != -1)
     {
       switch (optc)
@@ -228,6 +235,9 @@
 	  break;
 	case 'c':
 	  break;
+	case 'C':
+	  compare_files = 1;
+	  break;
 	case 's':
 	  strip_files = 1;
 	  break;
@@ -436,6 +446,9 @@
       return 1;
     }
 
+  if (compare_files && issame(from,to))
+    return 0;
+
   fail = copy (from, to, nonexistent_dst, x, &copy_into_self, NULL);
 
   return fail;
@@ -510,6 +523,64 @@
   return 0;
 }
 
+/* Check if from and to are same
+   Return 0 if not, 1 if yes */
+
+#define CMP_BLKSIZE	4096
+#ifndef EINTR
+#  define EINTR 0
+#  define V_EINTR
+#endif
+static int
+issame (const char *from, const char *to)
+{
+  struct stat buf1, buf2;
+  char p1[CMP_BLKSIZE], p2[CMP_BLKSIZE];
+  ssize_t n_read1, n_read2;
+  int ret;
+  int fd1, fd2;
+
+  if (lstat(from, &buf1) || lstat(to, &buf2))
+    return 0;
+
+  if (buf1.st_size != buf2.st_size)
+    return 0;
+
+  if (!S_ISREG(buf1.st_mode) || !S_ISREG(buf2.st_mode))
+    return 0;
+
+  fd1=open(from, O_RDONLY); if (fd1<0) return 0;
+  fd2=open(to, O_RDONLY); if (fd2<0) { close (fd1); return 0; }
+
+  for (;;)
+  {
+    do {
+      n_read1=read(fd1, p1, CMP_BLKSIZE);
+      if (n_read1<0 && errno!=EINTR) { ret=0; goto do_return; }
+    } while (n_read1<0 && errno==EINTR);
+
+    do {
+      n_read2=read(fd2, p2, CMP_BLKSIZE);
+      if (n_read2<0 && errno!=EINTR) { ret=0; goto do_return; }
+    } while (n_read2<0 && errno==EINTR);
+
+   if (n_read1 == 0 && n_read2 == 0) { ret=1; goto do_return; }
+
+    if (n_read1!=n_read2) { ret=0; goto do_return; }
+
+    if (n_read1>0 && memcmp(p1, p2, n_read1)) { ret=0; goto do_return; }
+  }
+
+do_return:
+  close(fd1);
+  close(fd2);
+  return(ret);
+}
+#ifdef V_EINTR
+#  undef EINTR
+#endif
+#undef CMP_BLKSIZE
+
 /* Strip the symbol table from the file PATH.
    We could dig the magic number out of the file first to
    determine whether to strip it, but the header files and
@@ -613,6 +684,7 @@
       --backup[=CONTROL] make a backup of each existing destination file\n\
   -b                  like --backup but does not accept an argument\n\
   -c                  (ignored)\n\
+  -C, --compare       compare source and destination files before overwriting\n\
   -d, --directory     treat all arguments as directory names; create all\n\
                         components of the specified directories\n\
 "), stdout);

Attachment: pgp00000.pgp
Description: signature

_______________________________________________
Bug-coreutils mailing list
[EMAIL PROTECTED]
http://mail.gnu.org/mailman/listinfo/bug-coreutils

Reply via email to