Instead of using a variable for space like expression, tranform every space like expression to space.
This is a net win of about 5% on execution time --- checks/cruft.pm | 112 +++++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 57 deletions(-) diff --git a/checks/cruft.pm b/checks/cruft.pm index ac2a9fc..aeee3ea 100644 --- a/checks/cruft.pm +++ b/checks/cruft.pm @@ -505,101 +505,99 @@ sub find_cruft { if ( index($block, 'license') > -1 && $block =~ m/gnu (?:\s+|\s*<\/span>\s*|\s*\}\s+)? free \s+ - documentation \s+ license (?'gfdlsections'.{0,1024}?) + documentation \s+ license (?'rawgfdlsections'.{0,1024}?) a \s+ copy \s+ of \s+ the \s+ license \s+ is \s+ included/xsm ) { if (!exists $licenseproblemhash{'gfdl-invariants'}) { - my $gfdlsections = $+{gfdlsections}; - # local space - my $s = qr{(?: - \s | # regular space(s) + my $rawgfdlsections = $+{rawgfdlsections}; + my $gfdlsections = $rawgfdlsections; + + # delete some tag consider as space + # order from more specific to less specific + $gfdlsections =~ s{(?: + ^[-\+!<>] | # diff/patch lines + ^\.\\\" | # man comments \@c(?:omment)? | # Tex info comment - [%\*\"\|\\] | # String, C-style comment/javadoc indent, quotes for strings, pipe and antislash in some txt \"\s*, | # String array (e.g. "line1",\n"line2") ,\s*\" | # String array (e.g. "line1"\n ,"line2"), seen in findutils - \\n | # Verbatim \n in string array - \n[-\+!<>] | # diff/patch lines - \n\.\\\" | # man comments <br\s*/?> | # (X)HTML line breaks - </?link.*?> | # xml link - </?a.*?> | # a link - </?p.*?> | # html paragraph + </?link[^>]*?> | # xml link + </?a[^>]*?> | # a link + </?p[^>]*?> | # html paragraph \(\*note.*?::\) | # info file note - )}xsmo; + \\n | # Verbatim \n in string array + \s*[,\.;]\s*\Z | # final punctuation + \A\s*[,\.;]\s* | # punctuation at the beginning + [%\*\"\|\\] # String, C-style comment/javadoc indent, quotes for strings, pipe and antislash in some txt + )}{ }gxms; + + # delete double spacing now + $gfdlsections =~ s{\s+}{ }gsm; + $gfdlsections =~ s{\A\s+}{}gsm; + $gfdlsections =~ s{\s+\Z}{}gsm; + + $gfdlsections =~ s/ + \A \s* version \s+ \d+(?:\.\d+)? \s+ + (?:or \s+ any \s+ later \s+ version \s+)? + published \s+ by \s+ the \s+ Free \s+ Software \s+ Foundation \s* + (?: [,\.;] \s*)? + //xism; + # GFDL license, assume it is bad unless it # explicitly states it has no "bad sections". if ( $gfdlsections =~ m/ - no $s* Invariant $s+ Sections? $s* ,? - $s+ (?:with$s+)? (?:the$s+)? no $s+ Front(?:\\?-)?$s*Cover $s+ (?:Texts?)? $s* ,? $s+ (?:and$s+)? - (?:with$s+)? (?:the$s+)? no $s+ Back(?:\\?-)?$s*Cover/xiso + no \s* Invariant \s+ Sections? \s* ,? + \s+ (?:with\s+)? (?:the\s+)? no \s+ Front(?:\s*\\?-)?\s*Cover (?:\s+Texts?)? \s* ,? \s+ (?:and\s+)? + (?:with\s+)? (?:the\s+)? no \s+ Back(?:\s*\\?-)?\s*Cover/xiso ) { # no invariant } elsif ( $gfdlsections =~ m/ - no $s+ Invariant $s+ Sections?,? - $s+ (?:no$s+)? Front(?:[\\]?-)? $s+ or - $s+ (?:no$s+)? Back(?:[\\]?-)?$s*Cover $s+ Texts?/xiso + no \s+ Invariant \s+ Sections?,? + \s+ (?:no\s+)? Front(?:\s*[\\]?-)? \s+ or + \s+ (?:no\s+)? Back(?:\s*[\\]?-)?\s*Cover \s+ Texts?/xiso ) { # no invariant variant (dict-foldoc) } elsif ( - $gfdlsections =~ m/ - \A $s* (?: [\,\.;] $s* )? version $s+ \d+(?:\.\d+)? $s+ - (?:or $s+ any $s+ later $s+ version $s+)? - published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s* - (?: [,\.;] $s*)? - There $s+ are $s+ no $s+ invariants? $s+ sections? - (?: [,\.;] $s*)? \Z + $gfdlsections =~ m/\A + There \s+ are \s+ no \s+ invariants? \s+ sections? \Z /xismo ) { # no invariant libnss-pgsql version } elsif ( $gfdlsections =~ m/ - \A $s* (?: [\,\.;] $s* )? version $s+ \d+(?:\.\d+)? $s+ - (?:or $s+ any $s+ later $s+ version $s+)? - published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s* - (?: [,\.;] $s*)? - without $s+ any $s+ Invariant $s+ Sections $s* - (?: [,\.;] $s*)? \Z + without \s+ any \s+ Invariant \s+ Sections \s* + \Z /xismo ) { # no invariant parsewiki version } elsif ( $gfdlsections =~ m/ - (?: [,\.;] $s*)? version $s+ \d+(?:\.\d+)? $s+ - (?:or $s+ any $s+ later $s+ version $s+)? - published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s* - (?: [,\.;] $s*)? - with $s+ no $s+ invariants? $s+ sections? - (?: [,\.;] $s*)? \Z + \A with \s+ no \s+ invariants? \s+ sections? \Z /xismo ) { # no invariant lilypond version } elsif ( $gfdlsections =~ m/ - with $s+ the $s+ Invariant $s+ Sections $s+ being - $s+ (?:\@var\{|<var>)? LIST $s+ THEIR $s+TITLES (?:\}|<\/var>)? $s* ,? - $s+ with $s+ the $s+ Front(?:[\\]?-)$s*Cover $s+ Texts $s+ being - $s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)? $s* ,? - $s+ and $s+ with $s+ the $s+ Back(?:[\\]?-)$s*Cover $s+ Texts $s+ being - $s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)?/xiso + with \s+ the \s+ Invariant \s+ Sections \s+ being + \s+ (?:\@var\{|<var>)? LIST \s+ THEIR \s+TITLES (?:\}|<\/var>)? \s* ,? + \s+ with \s+ the \s+ Front(?:\s*[\\]?-)\s*Cover \s+ Texts \s+ being + \s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)? \s* ,? + \s+ and \s+ with \s+ the \s+ Back(?:\s*[\\]?-)\s*Cover \s+ Texts \s+ being + \s+ (?:\@var\{|<var>)? LIST (?:\}|<\/var>)?/xiso ) { # verbatim text of license is ok } elsif ( - $gfdlsections =~ m/ - \A $s* (?: [,\.;] $s*)? version $s+ \d+(?:\.\d+)? $s+ - (?:or $s+ any $s+ later $s+ version $s+)? - published $s+ by $s+ the $s+ Free $s+ Software $s+ Foundation $s* - (?: [,\.;] $s*)? \Z - /xismo + $gfdlsections =~ m/\A \s* \Z/xismo ) { # empty text is ambiguous tag 'license-problem-gfdl-invariants-empty',$name; $licenseproblemhash{'gfdl-invariants'} = 1; } elsif ( $gfdlsections =~ m/ - with $s+ \&FDLInvariantSections;, $s+ with $s+ \&FDLFrontCoverText;, - $s+ and $s+ with $s+ \&FDLBackCoverText;/xiso + with \s+ \&FDLInvariantSections;, \s+ with \s+ \&FDLFrontCoverText;, + \s+ and \s+ with \s+ \&FDLBackCoverText;/xiso ) { # fix #708957 about FDL entities in template unless ($name @@ -610,10 +608,10 @@ sub find_cruft { } } elsif ( # fix a false positive in maintain.texi - $gfdlsections =~ m/\A $s* \. $s* - Following $s+ is $s+ an $s+ example $s+ of $s+ the $s+ license $s+ notice $s+ - to $s+ use $s+ after $s+ the $s+ copyright $s+ line\(s\) $s+ using $s+ all $s+ the $s+ - features $s+ of $s+ the $s+ GFDL/xismo + $gfdlsections =~ m/\A + Following \s+ is \s+ an \s+ example \s+ of \s+ the \s+ license \s+ notice \s+ + to \s+ use \s+ after \s+ the \s+ copyright \s+ line\(s\) \s+ using \s+ all \s+ the \s+ + features \s+ of \s+ the \s+ GFDL/xiso ) { # allow only one text unless ($name =~ m/maintain/) { @@ -621,7 +619,7 @@ sub find_cruft { $licenseproblemhash{'gfdl-invariants'} = 1; } } else { - tag 'license-problem-gfdl-invariants', $name; + tag 'license-problem-gfdl-invariants', $name, "\"$gfdlsections\""; $licenseproblemhash{'gfdl-invariants'} = 1; } } -- 1.7.10.4 -- To UNSUBSCRIBE, email to debian-bugs-dist-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org