Author: eelco
Date: Mon Nov 29 15:26:28 2010
New Revision: 24920
URL: https://svn.nixos.org/websvn/nix/?rev=24920&sc=1

Log:
* Move the patch generator into a module.

Added:
   nix/branches/sqlite/scripts/GeneratePatches.pm.in
      - copied, changed from r24910, 
nix/branches/sqlite/scripts/generate-patches.pl.in
Deleted:
   nix/branches/sqlite/scripts/generate-patches.pl.in
Modified:
   nix/branches/sqlite/scripts/Makefile.am
   nix/branches/sqlite/scripts/readmanifest.pm.in

Copied and modified: nix/branches/sqlite/scripts/GeneratePatches.pm.in (from 
r24910, nix/branches/sqlite/scripts/generate-patches.pl.in)
==============================================================================
--- nix/branches/sqlite/scripts/generate-patches.pl.in  Mon Nov 29 09:26:05 
2010        (r24910, copy source)
+++ nix/branches/sqlite/scripts/GeneratePatches.pm.in   Mon Nov 29 15:26:28 
2010        (r24920)
@@ -9,7 +9,7 @@
 
 # Max size of NAR archives to generate patches for.
 my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"};
-$maxNarSize = 100 * 1024 * 1024 if !defined $maxNarSize;
+$maxNarSize = 160 * 1024 * 1024 if !defined $maxNarSize;
 
 # If patch is bigger than this fraction of full archive, reject.
 my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"};
@@ -18,36 +18,8 @@
 my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"};
 $timeLimit = 180 if !defined $timeLimit;
 
-
-die unless scalar @ARGV == 5;
-
 my $hashAlgo = "sha256";
 
-my $narDir = $ARGV[0];
-my $patchesDir = $ARGV[1];
-my $patchesURL = $ARGV[2];
-my $srcManifest = $ARGV[3];
-my $dstManifest = $ARGV[4];
-
-my $tmpDir = tempdir("nix-generate-patches.XXXXXX", CLEANUP => 1, TMPDIR => 1)
-    or die "cannot create a temporary directory";
-
-#END { rmdir $tmpDir; }
-
-my %srcNarFiles;
-my %srcLocalPaths;
-my %srcPatches;
-
-my %dstNarFiles;
-my %dstLocalPaths;
-my %dstPatches;
-
-readManifest "$srcManifest",
-    \%srcNarFiles, \%srcLocalPaths, \%srcPatches;
-
-readManifest "$dstManifest",
-    \%dstNarFiles, \%dstLocalPaths, \%dstPatches;
-
 
 sub findOutputPaths {
     my $narFiles = shift;
@@ -73,12 +45,6 @@
     return %outPaths;
 }
 
-print "finding src output paths...\n";
-my %srcOutPaths = findOutputPaths \%srcNarFiles;
-
-print "finding dst output paths...\n";
-my %dstOutPaths = findOutputPaths \%dstNarFiles;
-
 
 sub getNameVersion {
     my $p = shift;
@@ -109,6 +75,7 @@
 
 
 sub getNarBz2 {
+    my $narPath = shift;
     my $narFiles = shift;
     my $storePath = shift;
     
@@ -120,7 +87,7 @@
 
     $narFile->{url} =~ /\/([^\/]+)$/;
     die unless defined $1;
-    return "$narDir/$1";
+    return "$narPath/$1";
 }
 
 
@@ -139,291 +106,230 @@
 }
 
 
-# Compute the "weighted" number of uses of a path in the build graph.
-sub computeUses {
-    my $narFiles = shift;
-    my $path = shift;
+sub generatePatches {
+    my ($srcNarFiles, $dstNarFiles, $srcPatches, $dstPatches, $narPath, 
$patchesPath, $patchesURL, $tmpDir) = @_;
 
-    # Find the deriver of $path.
-    return 1 unless defined $$narFiles{$path};
-    my $deriver = @{$$narFiles{$path}}[0]->{deriver};
-    return 1 unless defined $deriver && $deriver ne "";
-
-#    print "  DERIVER $deriver\n";
-
-    # Optimisation: build the referrers graph from the references
-    # graph.
-    my %referrers;
-    foreach my $q (keys %{$narFiles}) {
-        my @refs = split " ", @{$$narFiles{$q}}[0]->{references};
-        foreach my $r (@refs) {
-            $referrers{$r} = [] unless defined $referrers{$r};
-            push @{$referrers{$r}}, $q;
-        }
-    }
+    my %srcOutPaths = findOutputPaths $srcNarFiles;
+    my %dstOutPaths = findOutputPaths $dstNarFiles;
 
-    # Determine the shortest path from $deriver to all other reachable
-    # paths in the `referrers' graph.
+    # For each output path in the destination, see if we need to / can
+    # create a patch.
 
-    my %dist;
-    $dist{$deriver} = 0;
+    print STDERR "creating patches...\n";
 
-    my @queue = ($deriver);
-    my $pos = 0;
-    
-    while ($pos < scalar @queue) {
-        my $p = $queue[$pos];
-        $pos++;
-
-        foreach my $q (@{$referrers{$p}}) {
-            if (!defined $dist{$q}) {
-                $dist{$q} = $dist{$p} + 1;
-#                print "    $q $dist{$q}\n";
-                push @queue, $q;
-            }
-        }
-    }
-
-    my $wuse = 1.0;
-    foreach my $user (keys %dist) {
-        next if $user eq $deriver;
-#        print "    $user $dist{$user}\n";
-        $wuse += 1.0 / 2.0**$dist{$user};
-    }
+    foreach my $p (keys %dstOutPaths) {
 
-#    print "    XXX $path $wuse\n";
+        # If exactly the same path already exists in the source, skip it.
+        next if defined $srcOutPaths{$p};
     
-    return $wuse;
-}
-
-
-# For each output path in the destination, see if we need to / can
-# create a patch.
-
-print "creating patches...\n";
-
-foreach my $p (keys %dstOutPaths) {
+        print "  $p\n";
 
-    # If exactly the same path already exists in the source, skip it.
-    next if defined $srcOutPaths{$p};
-    
-    print "  $p\n";
-
-    # If not, then we should find the paths in the source that are
-    # `most' likely to be present on a system that wants to install
-    # this path.
-
-    (my $name, my $version) = getNameVersion $p;
-    next unless defined $name && defined $version;
-
-    my @closest = ();
-    my $closestVersion;
-    my $minDist = -1; # actually, larger means closer
+        # If not, then we should find the paths in the source that are
+        # `most' likely to be present on a system that wants to
+        # install this path.
+
+        (my $name, my $version) = getNameVersion $p;
+        next unless defined $name && defined $version;
+
+        my @closest = ();
+        my $closestVersion;
+        my $minDist = -1; # actually, larger means closer
+
+        # Find all source paths with the same name.
+
+        foreach my $q (keys %srcOutPaths) {
+            (my $name2, my $version2) = getNameVersion $q;
+            next unless defined $name2 && defined $version2;
+
+            if ($name eq $name2) {
+
+                my $srcSystem = @{$$dstNarFiles{$p}}[0]->{system};
+                my $dstSystem = @{$$srcNarFiles{$q}}[0]->{system};
+                if (defined $srcSystem && defined $dstSystem && $srcSystem ne 
$dstSystem) {
+                    print "    SKIPPING $q due to different systems 
($srcSystem vs. $dstSystem)\n";
+                    next;
+                }
+
+                # If the sizes differ too much, then skip.  This
+                # disambiguates between, e.g., a real component and a
+                # wrapper component (cf. Firefox in Nixpkgs).
+                my $srcSize = @{$$srcNarFiles{$q}}[0]->{size};
+                my $dstSize = @{$$dstNarFiles{$p}}[0]->{size};
+                my $ratio = $srcSize / $dstSize;
+                $ratio = 1 / $ratio if $ratio < 1;
+                # print "  SIZE $srcSize $dstSize $ratio $q\n";
+
+                if ($ratio >= 3) {
+                    print "    SKIPPING $q due to size ratio $ratio ($srcSize 
vs. $dstSize)\n";
+                    next;
+                }
+
+                # If there are multiple matching names, include the
+                # ones with the closest version numbers.
+                my $dist = versionDiff $version, $version2;
+                if ($dist > $minDist) {
+                    $minDist = $dist;
+                    @closest = ($q);
+                    $closestVersion = $version2;
+                } elsif ($dist == $minDist) {
+                    push @closest, $q;
+                }
+            }
+        }
 
-    # Find all source paths with the same name.
+        if (scalar(@closest) == 0) {
+            print "    NO BASE: $p\n";
+            next;
+        }
 
-    foreach my $q (keys %srcOutPaths) {
-        (my $name2, my $version2) = getNameVersion $q;
-        next unless defined $name2 && defined $version2;
+        foreach my $closest (@closest) {
 
-        if ($name eq $name2) {
+            # Generate a patch between $closest and $p.
+            print STDERR "  $p <- $closest\n";
 
-            my $srcSystem = @{$dstNarFiles{$p}}[0]->{system};
-            my $dstSystem = @{$srcNarFiles{$q}}[0]->{system};
-            if (defined $srcSystem && defined $dstSystem && $srcSystem ne 
$dstSystem) {
-                print "  SKIPPING $q due to different systems ($srcSystem vs. 
$dstSystem)\n";
+            # If the patch already exists, skip it.
+            if (containsPatch($srcPatches, $p, $closest) ||
+                containsPatch($dstPatches, $p, $closest))
+            {
+                print "    skipping, already exists\n";
                 next;
             }
 
-            # If the sizes differ too much, then skip.  This
-            # disambiguates between, e.g., a real component and a
-            # wrapper component (cf. Firefox in Nixpkgs).
-            my $srcSize = @{$srcNarFiles{$q}}[0]->{size};
-            my $dstSize = @{$dstNarFiles{$p}}[0]->{size};
-            my $ratio = $srcSize / $dstSize;
-            $ratio = 1 / $ratio if $ratio < 1;
-#            print "  SIZE $srcSize $dstSize $ratio $q\n";
+            my $srcNarBz2 = getNarBz2 $narPath, $srcNarFiles, $closest;
+            my $dstNarBz2 = getNarBz2 $narPath, $dstNarFiles, $p;
 
-            if ($ratio >= 3) {
-                print "  SKIPPING $q due to size ratio $ratio ($srcSize vs. 
$dstSize)\n";
+            if (! -f $srcNarBz2) {
+                warn "patch source archive $srcNarBz2 is missing\n";
                 next;
             }
 
-            # If the numbers of weighted uses differ too much, then
-            # skip.  This disambiguates between, e.g., the bootstrap
-            # GCC and the final GCC in Nixpkgs.
-#            my $srcUses = computeUses \%srcNarFiles, $q;
-#            my $dstUses = computeUses \%dstNarFiles, $p;
-#            $ratio = $srcUses / $dstUses;
-#            $ratio = 1 / $ratio if $ratio < 1;
-#            print "  USE $srcUses $dstUses $ratio $q\n";
-            
-#            if ($ratio >= 2) {
-#                print "  SKIPPING $q due to use ratio $ratio ($srcUses 
$dstUses)\n";
-#                next;
-#            }
-
-            # If there are multiple matching names, include the ones
-            # with the closest version numbers.
-            my $dist = versionDiff $version, $version2;
-            if ($dist > $minDist) {
-                $minDist = $dist;
-                @closest = ($q);
-                $closestVersion = $version2;
-            } elsif ($dist == $minDist) {
-                push @closest, $q;
-            }
-        }
-    }
-
-    if (scalar(@closest) == 0) {
-        print "  NO BASE: $p\n";
-        next;
-    }
-
-    foreach my $closest (@closest) {
-
-        # Generate a patch between $closest and $p.
-        print "  $p <- $closest\n";
-
-        # If the patch already exists, skip it.
-        if (containsPatch(\%srcPatches, $p, $closest) ||
-            containsPatch(\%dstPatches, $p, $closest))
-        {
-            print "    skipping, already exists\n";
-            next;
-        }
-
-#        next;
-        
-        my $srcNarBz2 = getNarBz2 \%srcNarFiles, $closest;
-        my $dstNarBz2 = getNarBz2 \%dstNarFiles, $p;
-
-        if (! -f $srcNarBz2) {
-            warn "patch source archive $srcNarBz2 is missing\n";
-            next;
-        }
-
-        system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0
-            or die "cannot unpack $srcNarBz2";
+            system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0
+                or die "cannot unpack $srcNarBz2";
 
-        if ((stat "$tmpDir/A")[7] >= $maxNarSize) {
-            print "    skipping, source is too large\n";
-            next;
-        }
+            if ((stat "$tmpDir/A")[7] >= $maxNarSize) {
+                print "    skipping, source is too large\n";
+                next;
+            }
         
-        system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0
-            or die "cannot unpack $dstNarBz2";
+            system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0
+                or die "cannot unpack $dstNarBz2";
 
-        if ((stat "$tmpDir/B")[7] >= $maxNarSize) {
-            print "    skipping, destination is too large\n";
-            next;
-        }
+            if ((stat "$tmpDir/B")[7] >= $maxNarSize) {
+                print "    skipping, destination is too large\n";
+                next;
+            }
         
-        my $time1 = time();
-        my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A 
$tmpDir/B $tmpDir/DIFF");
-        my $time2 = time();
-        if ($res) {
-            warn "binary diff computation aborted after ", $time2 - $time1, " 
seconds\n";
-            next;
-        }
+            my $time1 = time();
+            my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff 
$tmpDir/A $tmpDir/B $tmpDir/DIFF");
+            my $time2 = time();
+            if ($res) {
+                warn "binary diff computation aborted after ", $time2 - 
$time1, " seconds\n";
+                next;
+            }
 
-        my $baseHash = `...@bindir@/nix-hash --flat --type $hashAlgo --base32 
$tmpDir/A` or die;
-        chomp $baseHash;
+            my $baseHash = `...@bindir@/nix-hash --flat --type $hashAlgo 
--base32 $tmpDir/A` or die;
+            chomp $baseHash;
 
-        my $narHash = `...@bindir@/nix-hash --flat --type $hashAlgo --base32 
$tmpDir/B` or die;
-        chomp $narHash;
+            my $narHash = `...@bindir@/nix-hash --flat --type $hashAlgo 
--base32 $tmpDir/B` or die;
+            chomp $narHash;
 
-        my $narDiffHash = `...@bindir@/nix-hash --flat --type $hashAlgo 
--base32 $tmpDir/DIFF` or die;
-        chomp $narDiffHash;
+            my $narDiffHash = `...@bindir@/nix-hash --flat --type $hashAlgo 
--base32 $tmpDir/DIFF` or die;
+            chomp $narDiffHash;
 
-        my $narDiffSize = (stat "$tmpDir/DIFF")[7];
-        my $dstNarBz2Size = (stat $dstNarBz2)[7];
+            my $narDiffSize = (stat "$tmpDir/DIFF")[7];
+            my $dstNarBz2Size = (stat $dstNarBz2)[7];
 
-        print "    size $narDiffSize; full size $dstNarBz2Size; ", $time2 - 
$time1, " seconds\n";
+            print "    size $narDiffSize; full size $dstNarBz2Size; ", $time2 
- $time1, " seconds\n";
         
-        if ($narDiffSize >= $dstNarBz2Size) {
-            print "    rejecting; patch bigger than full archive\n";
-            next;
-        }
+            if ($narDiffSize >= $dstNarBz2Size) {
+                print "    rejecting; patch bigger than full archive\n";
+                next;
+            }
     
-        if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) {
-            print "    rejecting; patch too large relative to full archive\n";
-            next;
-        }
+            if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) {
+                print "    rejecting; patch too large relative to full 
archive\n";
+                next;
+            }
     
-        my $finalName =
-            "$narDiffHash.nar-bsdiff";
-
-        if (-e "$patchesDir/$finalName") {
-            print "    not copying, already exists\n";
-        }
+            my $finalName = "$narDiffHash.nar-bsdiff";
 
-        else {
+            if (-e "$patchesPath/$finalName") {
+                print "    not copying, already exists\n";
+            }
 
-            system("cp '$tmpDir/DIFF' '$patchesDir/$finalName.tmp'") == 0
-                or die "cannot copy diff";
-            
-            rename("$patchesDir/$finalName.tmp", "$patchesDir/$finalName")
-                or die "cannot rename $patchesDir/$finalName.tmp";
-            
-        }
+            else {
+                system("cp '$tmpDir/DIFF' '$patchesPath/$finalName.tmp'") == 0
+                    or die "cannot copy diff";
+                rename("$patchesPath/$finalName.tmp", 
"$patchesPath/$finalName")
+                    or die "cannot rename $patchesPath/$finalName.tmp";
+            }
         
-        # Add the patch to the manifest.
-        addPatch \%dstPatches, $p,
-            { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash"
-            , size => $narDiffSize, basePath => $closest, baseHash => 
"$hashAlgo:$baseHash"
-            , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
-            }, 0;
+            # Add the patch to the manifest.
+            addPatch $dstPatches, $p,
+                { url => "$patchesURL/$finalName", hash => 
"$hashAlgo:$narDiffHash"
+                , size => $narDiffSize, basePath => $closest, baseHash => 
"$hashAlgo:$baseHash"
+                , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
+                };
+        }
     }
 }
 
 
-# Add in any potentially useful patches in the source (namely, those
-# patches that produce either paths in the destination or paths that
-# can be used as the base for other useful patches).
-
-print "propagating patches...\n";
-
-my $changed;
-do {
-    # !!! we repeat this to reach the transitive closure; inefficient
-    $changed = 0;
-
-    print "loop\n";
-
-    my %dstBasePaths;
-    foreach my $q (keys %dstPatches) {
-       foreach my $patch (@{$dstPatches{$q}}) {
-           $dstBasePaths{$patch->{basePath}} = 1;
-       }
-    }
+# Propagate useful patches from $srcPatches to $dstPatches.  A patch
+# is useful if it produces either paths in the $dstNarFiles or paths
+# that can be used as the base for other useful patches.
+sub propagatePatches {
+    my ($srcPatches, $dstNarFiles, $dstPatches) = @_;
+
+    print STDERR "propagating patches...\n";
+
+    my $changed;
+    do {
+        # !!! we repeat this to reach the transitive closure; inefficient
+        $changed = 0;
+
+        print STDERR "loop\n";
+
+        my %dstBasePaths;
+        foreach my $q (keys %{$dstPatches}) {
+            foreach my $patch (@{$$dstPatches{$q}}) {
+                $dstBasePaths{$patch->{basePath}} = 1;
+            }
+        }
 
-    foreach my $p (keys %srcPatches) {
-        my $patchList = $srcPatches{$p};
+        foreach my $p (keys %{$srcPatches}) {
+            my $patchList = $$srcPatches{$p};
 
-        my $include = 0;
+            my $include = 0;
 
-        # Is path $p included in the destination?  If so, include
-        # patches that produce it.
-        $include = 1 if defined $dstNarFiles{$p};
-
-        # Is path $p a path that serves as a base for paths in the
-        # destination?  If so, include patches that produce it.
-       # !!! check baseHash
-        $include = 1 if defined $dstBasePaths{$p};
-
-        if ($include) {
-            foreach my $patch (@{$patchList}) {
-                $changed = 1 if addPatch \%dstPatches, $p, $patch;
+            # Is path $p included in the destination?  If so, include
+            # patches that produce it.
+            $include = 1 if defined $$dstNarFiles{$p};
+
+            # Is path $p a path that serves as a base for paths in the
+            # destination?  If so, include patches that produce it.
+            # !!! check baseHash
+            $include = 1 if defined $dstBasePaths{$p};
+
+            if ($include) {
+                foreach my $patch (@{$patchList}) {
+                    $changed = 1 if addPatch $dstPatches, $p, $patch;
+                }
             }
-        }
         
-    }
+        }
     
-} while $changed;
+    } while $changed;
+}
+
+
+# Add all new patches in $srcPatches to $dstPatches.
+sub copyPatches {
+    my ($srcPatches, $dstPatches) = @_;
+    foreach my $p (keys %{$srcPatches}) {
+        addPatch $dstPatches, $p, $_ foreach @{$$srcPatches{$p}};
+    }
+}
 
 
-# Rewrite the manifest of the destination (with the new patches).
-writeManifest "${dstManifest}",
-    \%dstNarFiles, \%dstPatches;
+return 1;

Modified: nix/branches/sqlite/scripts/Makefile.am
==============================================================================
--- nix/branches/sqlite/scripts/Makefile.am     Mon Nov 29 15:25:07 2010        
(r24919)
+++ nix/branches/sqlite/scripts/Makefile.am     Mon Nov 29 15:26:28 2010        
(r24920)
@@ -3,7 +3,7 @@
   nix-install-package nix-channel nix-build \
   nix-copy-closure 
 
-noinst_SCRIPTS = nix-profile.sh generate-patches.pl \
+noinst_SCRIPTS = nix-profile.sh GeneratePatches.pm \
   find-runtime-roots.pl build-remote.pl nix-reduce-build \
   copy-from-other-stores.pl nix-http-export.cgi
 
@@ -17,7 +17,7 @@
        $(INSTALL_DATA) readconfig.pm $(DESTDIR)$(libexecdir)/nix 
        $(INSTALL_DATA) ssh.pm $(DESTDIR)$(libexecdir)/nix 
        $(INSTALL_PROGRAM) find-runtime-roots.pl $(DESTDIR)$(libexecdir)/nix 
-       $(INSTALL_PROGRAM) generate-patches.pl $(DESTDIR)$(libexecdir)/nix 
+       $(INSTALL_PROGRAM) GeneratePatches.pm $(DESTDIR)$(libexecdir)/nix 
        $(INSTALL_PROGRAM) build-remote.pl $(DESTDIR)$(libexecdir)/nix 
        $(INSTALL) -d $(DESTDIR)$(libexecdir)/nix/substituters
        $(INSTALL_PROGRAM) download-using-manifests.pl 
$(DESTDIR)$(libexecdir)/nix/substituters
@@ -36,7 +36,7 @@
   nix-build.in \
   download-using-manifests.pl.in \
   copy-from-other-stores.pl.in \
-  generate-patches.pl.in \
+  GeneratePatches.pm.in \
   nix-copy-closure.in \
   find-runtime-roots.pl.in \
   build-remote.pl.in \

Modified: nix/branches/sqlite/scripts/readmanifest.pm.in
==============================================================================
--- nix/branches/sqlite/scripts/readmanifest.pm.in      Mon Nov 29 15:25:07 
2010        (r24919)
+++ nix/branches/sqlite/scripts/readmanifest.pm.in      Mon Nov 29 15:26:28 
2010        (r24920)
@@ -146,7 +146,7 @@
 
 
 sub writeManifest {
-    my ($manifest, $narFiles, $patches) = @_;
+    my ($manifest, $narFiles, $patches, $noCompress) = @_;
 
     open MANIFEST, ">$manifest.tmp"; # !!! check exclusive
 
@@ -198,11 +198,13 @@
 
 
     # Create a bzipped manifest.
-    system("@bzip2@ < $manifest > $manifest.bz2.tmp") == 0
-        or die "cannot compress manifest";
+    unless (defined $noCompress) {
+       system("@bzip2@ < $manifest > $manifest.bz2.tmp") == 0
+           or die "cannot compress manifest";
 
-    rename("$manifest.bz2.tmp", "$manifest.bz2")
-        or die "cannot rename $manifest.bz2.tmp: $!";
+       rename("$manifest.bz2.tmp", "$manifest.bz2")
+           or die "cannot rename $manifest.bz2.tmp: $!";
+    }
 }
 
 
_______________________________________________
nix-commits mailing list
[email protected]
http://mail.cs.uu.nl/mailman/listinfo/nix-commits

Reply via email to