cleanpatch: a script to clean up stealth whitespace added by a patch

Linux Kernel Mailing List Sun, 06 May 2007 14:02:15 -0700

Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=6e019b001d27c4289f8e48ebc458e92410446259
Commit:     6e019b001d27c4289f8e48ebc458e92410446259
Parent:     12b315603a1eb95b4e4ea3389ba44bd7ded0aa35
Author:     H. Peter Anvin <[EMAIL PROTECTED]>
AuthorDate: Fri Mar 16 14:45:06 2007 -0700
Committer:  Sam Ravnborg <[EMAIL PROTECTED]>
CommitDate: Wed May 2 20:58:08 2007 +0200


    cleanpatch: a script to clean up stealth whitespace added by a patch
    
    This script is a companion to the "cleanfile" script.  This cleans
    up a patch in unified diff format *before* it is applied.  Note that
    the empty lines at the end of file detection *requires* that the diff was
    taken with at least one line of context around each hunk, or bad things
    will happen.
    
    This script cleans up various classes of stealth whitespace.  In
    particular, it cleans up:
    
    - Whitespace (spaces or tabs)before newline;
    - DOS line endings (CR before LF);
    - Space before tab (spaces are deleted or converted to tabs);
    - Empty lines at end of file.
    
    Signed-off-by: H. Peter Anvin <[EMAIL PROTECTED]>
    Signed-off-by: Sam Ravnborg <[EMAIL PROTECTED]>
---
 scripts/cleanpatch |  206 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 206 insertions(+), 0 deletions(-)

diff --git a/scripts/cleanpatch b/scripts/cleanpatch
new file mode 100755
index 0000000..a53f987
--- /dev/null
+++ b/scripts/cleanpatch
@@ -0,0 +1,206 @@
+#!/usr/bin/perl -w
+#
+# Clean a patch file -- or directory of patch files -- of stealth whitespace.
+# WARNING: this can be a highly destructive operation.  Use with caution.
+#
+
+use bytes;
+use File::Basename;
+
+#
+# Clean up space-tab sequences, either by removing spaces or
+# replacing them with tabs.
+sub clean_space_tabs($)
+{
+    no bytes;                  # Tab alignment depends on characters
+
+    my($li) = @_;
+    my($lo) = '';
+    my $pos = 0;
+    my $nsp = 0;
+    my($i, $c);
+
+    for ($i = 0; $i < length($li); $i++) {
+       $c = substr($li, $i, 1);
+       if ($c eq "\t") {
+           my $npos = ($pos+$nsp+8) & ~7;
+           my $ntab = ($npos >> 3) - ($pos >> 3);
+           $lo .= "\t" x $ntab;
+           $pos = $npos;
+           $nsp = 0;
+       } elsif ($c eq "\n" || $c eq "\r") {
+           $lo .= " " x $nsp;
+           $pos += $nsp;
+           $nsp = 0;
+           $lo .= $c;
+           $pos = 0;
+       } elsif ($c eq " ") {
+           $nsp++;
+       } else {
+           $lo .= " " x $nsp;
+           $pos += $nsp;
+           $nsp = 0;
+           $lo .= $c;
+           $pos++;
+       }
+    }
+    $lo .= " " x $nsp;
+    return $lo;
+}
+
+$name = basename($0);
+
+foreach $f ( @ARGV ) {
+    print STDERR "$name: $f\n";
+
+    if (! -f $f) {
+       print STDERR "$f: not a file\n";
+       next;
+    }
+
+    if (!open(FILE, '+<', $f)) {
+       print STDERR "$name: Cannot open file: $f: $!\n";
+       next;
+    }
+
+    binmode FILE;
+
+    # First, verify that it is not a binary file; consider any file
+    # with a zero byte to be a binary file.  Is there any better, or
+    # additional, heuristic that should be applied?
+    $is_binary = 0;
+
+    while (read(FILE, $data, 65536) > 0) {
+       if ($data =~ /\0/) {
+           $is_binary = 1;
+           last;
+       }
+    }
+
+    if ($is_binary) {
+       print STDERR "$name: $f: binary file\n";
+       next;
+    }
+
+    seek(FILE, 0, 0);
+
+    $in_bytes = 0;
+    $out_bytes = 0;
+
+    @lines  = ();
+
+    $in_hunk = 0;
+    $err = 0;
+
+    while ( defined($line = <FILE>) ) {
+       $in_bytes += length($line);
+
+       if (!$in_hunk) {
+           if ($line =~ /[EMAIL 
PROTECTED]@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)[EMAIL PROTECTED]@/) {
+               $minus_lines = $2;
+               $plus_lines = $4;
+               if ($minus_lines || $plus_lines) {
+                   $in_hunk = 1;
+                   @hunk_lines = ($line);
+               }
+           } else {
+               push(@lines, $line);
+               $out_bytes += length($line);
+           }
+       } else {
+           # We're in a hunk
+
+           if ($line =~ /^\+/) {
+               $plus_lines--;
+
+               $text = substr($line, 1);
+               $text =~ s/[ \t\r]*$//;         # Remove trailing spaces
+               $text = clean_space_tabs($text);
+
+               push(@hunk_lines, '+'.$text);
+           } elsif ($line =~ /^\-/) {
+               $minus_lines--;
+               push(@hunk_lines, $line);
+           } elsif ($line =~ /^ /) {
+               $plus_lines--;
+               $minus_lines--;
+               push(@hunk_lines, $line);
+           } else {
+               print STDERR "$name: $f: malformed patch\n";
+               $err = 1;
+               last;
+           }
+
+           if ($plus_lines < 0 || $minus_lines < 0) {
+               print STDERR "$name: $f: malformed patch\n";
+               $err = 1;
+               last;
+           } elsif ($plus_lines == 0 && $minus_lines == 0) {
+               # End of a hunk.  Process this hunk.
+               my $i;
+               my $l;
+               my @h = ();
+               my $adj = 0;
+               my $done = 0;
+
+               for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
+                   $l = $hunk_lines[$i];
+                   if (!$done && $l eq "+\n") {
+                       $adj++; # Skip this line
+                   } elsif ($l =~ /^[ +]/) {
+                       $done = 1;
+                       unshift(@h, $l);
+                   } else {
+                       unshift(@h, $l);
+                   }
+               }
+
+               $l = $hunk_lines[0];  # Hunk header
+               undef @hunk_lines;    # Free memory
+
+               if ($adj) {
+                   die unless
+                       ($l =~ /[EMAIL 
PROTECTED]@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)[EMAIL 
PROTECTED]@(.*)$/);
+                   my $mstart = $1;
+                   my $mlin = $2;
+                   my $pstart = $3;
+                   my $plin = $4;
+                   my $tail = $5; # doesn't include the final newline
+
+                   $l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
+                                $mstart, $mlin, $pstart, $plin-$adj,
+                                $tail);
+               }
+               unshift(@h, $l);
+
+               # Transfer to the output array
+               foreach $l (@h) {
+                   $out_bytes += length($l);
+                   push(@lines, $l);
+               }
+
+               $in_hunk = 0;
+           }
+       }
+    }
+
+    if ($in_hunk) {
+       print STDERR "$name: $f: malformed patch\n";
+       $err = 1;
+    }
+
+    if (!$err) {
+       if ($in_bytes != $out_bytes) {
+           # Only write to the file if changed
+           seek(FILE, 0, 0);
+           print FILE @lines;
+
+           if ( !defined($where = tell(FILE)) ||
+                !truncate(FILE, $where) ) {
+               die "$name: Failed to truncate modified file: $f: $!\n";
+           }
+       }
+    }
+
+    close(FILE);
+}
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

cleanpatch: a script to clean up stealth whitespace added by a patch

Reply via email to