commit perl-XML-Twig for openSUSE:Factory

h_root Mon, 07 Mar 2011 06:09:35 -0800

Hello community,

here is the log from the commit of package perl-XML-Twig for openSUSE:Factory
checked in at Mon Mar 7 15:09:29 CET 2011.




--------
--- perl-XML-Twig/perl-XML-Twig.changes 2010-12-16 13:56:09.000000000 +0100
+++ /mounts/work_src_done/STABLE/perl-XML-Twig/perl-XML-Twig.changes    
2011-02-28 00:55:32.000000000 +0100
@@ -1,0 +2,15 @@
+Sun Feb 27 23:43:46 UTC 2011 - [email protected]
+
+- update to 3.38:
+  * fixed: RT#65865: _ should be allowed at the start on an XML name
+  * removed: making att and class lvalues created problems: in certain context
+    they made regular calls to the method create empty        attributes. I
+    could find no satisfactory fix,they were either incompletes, or to complex
+    for often used methods. So att and class are back to being regular, non
+    l-value methods.  latt and lclass are the l-value versions.
+  * added: documented the -html option for xml_grep, that allows processing
+    HTML input
+  * added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert HTML
+    to XML
+
+-------------------------------------------------------------------

calling whatdependson for head-i586


Old:
----
  XML-Twig-3.37.tar.bz2

New:
----
  XML-Twig-3.38.tar.bz2

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ perl-XML-Twig.spec ++++++
--- /var/tmp/diff_new_pack.cYmafR/_old  2011-03-07 15:07:45.000000000 +0100
+++ /var/tmp/diff_new_pack.cYmafR/_new  2011-03-07 15:07:45.000000000 +0100
@@ -1,7 +1,7 @@
 #
-# spec file for package perl-XML-Twig (Version 3.37 )
+# spec file for package perl-XML-Twig
 #
-# Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany.
+# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
 
 
 Name:           perl-XML-Twig
-Version:        3.37 
+Version:        3.38
 Release:        1
 Requires:       perl-XML-Parser
 BuildRequires:  perl-XML-Parser
@@ -27,8 +27,9 @@
 AutoReqProv:    on
 Group:          Development/Libraries/Perl
 License:        Artistic
-Url:            http://cpan.org/modules/by-module/XML/
-Summary:        Tree interface to XML documents allowing processing chunk by 
chunk of huge documents
+Url:            http://search.cpan.org/dist/XML::Twig
+Summary:        Tree interface to XML documents
+# http://search.cpan.org/CPAN/authors/id/M/MI/MIROD/XML-Twig-%{version}.tar.gz
 Source:         XML-Twig-%{version}.tar.bz2
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 %{perl_requires}
@@ -61,25 +62,31 @@
 %setup -q -n XML-Twig-%{version}
 
 %build
-perl Makefile.PL
-make
+%__perl Makefile.PL
+%__make
 
 %check
-make test
+%__make test
 
 %install
 %perl_make_install
 %perl_process_packlist
 
 %clean
-rm -rf $RPM_BUILD_ROOT
+%{?buildroot:%__rm -rf "%{buildroot}"}
 
 %files
 %defattr(-,root,root)
 %doc Changes README
-%doc %{_mandir}/man?/*
-%{perl_vendorlib}/XML
-%{perl_vendorarch}/auto/XML
-%{_bindir}/xml_*
+%dir %{perl_vendorlib}/XML
+%{perl_vendorlib}/XML/Twig.pm
+%{perl_vendorlib}/XML/Twig
+%{_bindir}/xml_grep
+%{_bindir}/xml_merge
+%{_bindir}/xml_pp
+%{_bindir}/xml_spellcheck
+%{_bindir}/xml_split
+%doc %{_mandir}/man1/xml_*.1%{ext_man}
+%doc %{_mandir}/man3/XML::Twig.%{perl_man3ext}%{ext_man}
 
 %changelog

++++++ XML-Twig-3.37.tar.bz2 -> XML-Twig-3.38.tar.bz2 ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/XML-Twig-3.37/Changes new/XML-Twig-3.38/Changes
--- old/XML-Twig-3.37/Changes   2010-10-09 07:45:32.000000000 +0200
+++ new/XML-Twig-3.38/Changes   2011-02-27 07:40:45.000000000 +0100
@@ -1,11 +1,38 @@
 $Id: /xmltwig/trunk/Changes 33 2008-04-30T08:03:41.004487Z mrodrigu  $
 
 CHANGES
+version 3.38
+date: 2011-07-27
+# minor maintenance release
+fixed: RT 65865: _ should be allowed at the start on an XML name
+       https://rt.cpan.org/Ticket/Display.html?id=65865
+       reported by Steve Prokopowich
+removed: making att and class lvalues created problems: in certain
+       context they made regular calls to the method create empty 
+�      attributes. I could find no satisfactory fix,they were either
+       incompletes, or to complex for often used methods. So att and
+       class are back to being regular, non l-value methods. 
+       latt and lclass are the l-value versions.
+added: documented the -html option for xml_grep, that allows processing
+       HTML input
+added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert
+       HTML to XML
+
 version 3.37
 date: 2010-10-08
 # minor maintenance release
 fixed: more tests fixed for HTML::TreeBuilder, hopefully 
        will pass now
+removed: making att and class lvalues created problems: in certain
+       context they made regular calls to the method create empty 
+�      attributes. I could find no satisfactory fix,they were either
+       incompletes, or to complex for often used methods. So att and
+       class are back to being regular, non l-value methods. 
+       latt and lclass are the l-value versions.
+added: documented the -html option for xml_grep, that allows processing
+       HTML input
+added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert
+       HTML to XML
 
 version 3.36
 date: 2010-10-07
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/XML-Twig-3.37/META.yml new/XML-Twig-3.38/META.yml
--- old/XML-Twig-3.37/META.yml  2010-10-10 07:52:02.000000000 +0200
+++ new/XML-Twig-3.38/META.yml  2011-02-27 07:42:00.000000000 +0100
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:               XML-Twig
-version:            3.37
+version:            3.38
 abstract:           XML, The Perl Way
 author:
     - Michel Rodriguez <[email protected]>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/XML-Twig-3.37/Twig.pm new/XML-Twig-3.38/Twig.pm
--- old/XML-Twig-3.37/Twig.pm   2010-10-10 07:52:01.000000000 +0200
+++ new/XML-Twig-3.38/Twig.pm   2011-02-27 07:41:59.000000000 +0100
@@ -38,16 +38,16 @@
 
 # xml name (leading # allowed)
 # first line is for perl 5.005, second line for modern perl, that accept 
character classes
-my $REG_NAME       = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*)};     # does 
not work for leading non-ascii letters
-   $REG_NAME       = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*)};    # > perl 
5.5
+my $REG_NAME       = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*)};     # does 
not work for leading non-ascii letters
+   $REG_NAME       = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*)};    # > 
perl 5.5
 
 # name or wildcard (* or '') (leading # allowed)
-my $REG_NAME_W     = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # does 
not work for leading non-ascii letters
-   $REG_NAME_W     = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 
5.5
+my $REG_NAME_W     = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # does 
not work for leading non-ascii letters
+   $REG_NAME_W     = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # > 
perl 5.5
 
 # name or wildcard (* or '') (leading # allowed) with optional class
-my $REG_NAME_WC    = 
q{(?(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # 
does not work for leading non-ascii letters
-   $REG_NAME_WC    = 
q{(?:(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; 
# > perl 5.5
+my $REG_NAME_WC    = 
q{(?(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # 
does not work for leading non-ascii letters
+   $REG_NAME_WC    = 
q{(?:(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; 
# > perl 5.5
 
 
 my $REG_REGEXP     = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)};               # regexp
@@ -97,7 +97,7 @@
 
 BEGIN
 { 
-$VERSION = '3.37';
+$VERSION = '3.38';
 
 use XML::Parser;
 my $needVersion = '2.23';
@@ -783,6 +783,13 @@
     return $@ ? $t->_reset_twig_after_error : $t;
   }
 
+sub parseurl_html
+  { my $t= shift;
+    _use( 'LWP::Simple') or croak "missing LWP::Simple"; 
+    $t->parse_html( LWP::Simple::get( shift()), @_); 
+  }
+
+
 # uses eval to catch the parser's death
 sub safe_parse_html
   { my $t= shift;
@@ -847,7 +854,7 @@
     if( _is_well_formed_xml( $_[0]))
       { $t->parse( @_) }
     else
-      { my $html= _html2xml( $_[0]);
+      { my $html= $t->{use_tidy} ?  _tidy_html( $_[0]) : _html2xml( $_[0]);
         if( _is_well_formed_xml( $html))
           { $t->parse( $html); }
         else
@@ -913,7 +920,7 @@
                          wrap => 0,
                          break_before_br => 0,
                        };
-
+    $options ||= {};
     my $tidy_options= { %$TIDY_DEFAULTS, %$options};
     my $tidy = HTML::Tidy->new( $tidy_options);
     $tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is 
TIDY_ERROR, not clean
@@ -1366,7 +1373,7 @@
 sub _set_regexp_handler
   { my( $handlers, $path, $handler, $prev_handler)= @_; 
     # if the expression was a regexp it is now a string (it was stringified 
when it became a hash key)
-    if( $path=~ m{^\(\?([xism]*)(?:-[xism]*)?:(.*)\)$}) 
+    if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) 
       { my $regexp= qr/(?$1:$2)/; # convert it back into a regexp
         my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{_tag} =~ 
$regexp ) }; 
         my $handler_data=  { tag=> '*', score => { type => $REGEXP_TRIGGER} , 
trigger => $sub, 
@@ -5522,9 +5529,12 @@
   }
 
 # methods dealing with the class attribute, convenient if you work with xhtml
-sub class     
+sub class   {   $_[0]->{att}->{class}; }
+# lvalue version of class. separate from class to avoid problem like RT#
+sub lclass     
           :lvalue    # > perl 5.5
-{ my( $elt)= @_; $elt->{'att'}->{'class'}; }
+  { $_[0]->{att}->{class}; }
+
 sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); }
 
 # adds a class to an element
@@ -5611,9 +5621,12 @@
     return $elt;
   }
  
-sub att 
+sub att {  $_[0]->{att}->{$_[1]}; }
+# lvalue version of att. separate from class to avoid problem like RT#
+sub latt 
           :lvalue    # > perl 5.5
-{ $_[0]->{att}->{$_[1]}; }
+  { $_[0]->{att}->{$_[1]}; }
+
 sub del_att 
   { my $elt= shift;
     while( @_) { delete $elt->{'att'}->{shift()}; }
@@ -7327,22 +7340,25 @@
       my $replacement_string;
       my $is_string= _is_string( $replace);
       foreach my $text_elt ($elt->descendants_or_self( $TEXT))
-        { if( $is_string)
+        { 
+          if( $is_string)
             { my $text= $text_elt->text;
               $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, 
$6, $7, $8, $9)}egx;
               $text_elt->set_text( $text);
            }
           else
-            { 
+            {  
               no utf8; # = perl 5.6
               my $replace_sub= ( $replace_sub{$replace} ||= 
_install_replace_sub( $replace)); 
               my $text= $text_elt->text;
               my $pos=0;  # used to skip text that was previously matched
+              my $found_hit;
               while( my( $pre_match_string, $match_string, @var)= ($text=~ 
m{(.*?)($regexp)}sg))
-                { my $match_start  = length( $pre_match_string);
-                  my $match        = $text_elt->split_at( $match_start + $pos);
+                { $found_hit=1;
+                  my $match_start  = length( $pre_match_string);
+                  my $match        = $match_start ? $text_elt->split_at( 
$match_start + $pos) : $text_elt;
                   my $match_length = length( $match_string);
-                  my $post_match   = $match->split_at( $match_length);
+                  my $post_match   = $match->split_at( $match_length); 
                   $replace_sub->( $match, @var);
                   # merge previous text with current one
                   my $next_sibling;
@@ -7357,21 +7373,12 @@
                   # go to next 
                   $text_elt= $post_match;
                   $text= $post_match->text;
-                  # merge last text element with next one if needed,
-                  # the match will be against the non-matched text,
-                  # so $pos is used to skip the merged part
-                  my $prev_sibling;
-                  if(    ($prev_sibling=  $post_match->{prev_sibling})
-                      && ($XML::Twig::index2gi[$post_match->{'gi'}] eq 
$XML::Twig::index2gi[$prev_sibling->{'gi'}])
-                    )
-                    { $pos= length( $prev_sibling->text);
-                      $prev_sibling->merge_text( $post_match); 
-                    }
 
                   # if the match is at the end of the text an empty #PCDATA is 
left: remove it 
                   if( !$text_elt->text) { $text_elt->delete; } 
                   
                 }
+              if( $found_hit) { $text_elt->normalize; } # in case consecutive 
#PCDATA have been created 
               
             }
         }
@@ -7392,22 +7399,22 @@
   sub _install_replace_sub
     { my $replace_exp= shift;
       my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp;
-      my $sub= q{ my( $match, @var)= @_; unshift @var, undef; my $new; };
+      my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;};
       my( $gi, $exp);
       foreach my $item (@item)
-        { if(    $item=~ m{^&elt\s*\(([^)]*)\)})
-            { $exp= $1;
-            }
+        { next if ! length $item;
+          if(    $item=~ m{^&elt\s*\(([^)]*)\)})
+            { $exp= $1; }
           elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)})
             { $exp= " '#ENT' => $1"; }
           else
             { $exp= qq{ '#PCDATA' => "$item"}; }
-          $exp=~ s{\$(\d)}{\$var[$1]}g; # replace references to matches
+          $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references 
to matches
           $sub.= qq{ \$new= \$match->new( $exp); };
-          $sub .= q{ $new->paste( before => $match); };
+          $sub .= q{ $new->paste( after => $last_inserted); 
$last_inserted=$new;};
         }
       $sub .= q{ $match->delete; };
-      #$sub=~ s/;/;\n/g;
+      #$sub=~ s/;/;\n/g; warn "subs: $sub"; 
       my $coderef= eval "sub { $NO_WARNINGS; $sub }";
       if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); }
       return $coderef;
@@ -8440,6 +8447,7 @@
 
 } # end of block containing package globals ($pretty_print, $quotes, 
keep_encoding...)
 
+# merges consecutive #PCDATAs in am element
 sub normalize
   { my( $elt)= @_;
     my @descendants= $elt->descendants( $PCDATA);
@@ -10486,13 +10494,15 @@
 get get an extra CDATA section inside ( <!-- foo --> becomes
 <!-- <![CDATA[ foo ]]> -->
 
-=item parsefile_html
+=item parsefile_html ($file)
 
 parse an HTML file (by converting it to XML using HTML::TreeBuilder, which 
-needs to be available). The file is loaded completely in memory and converted
-to XML before being parsed.
+needs to be available, or HTML::Tidy if the C<use_tidy> option was used).
+The file is loaded completely in memory and converted to XML before being 
parsed.
+
+=item parseurl_html ($url $optional_user_agent)
 
-B<Alpha>: implementation, and thus generated XML could change. 
+parse an URL as html the same way C<L<parse_html>> does
 
 =item safe_parseurl_html ($url $optional_user_agent)
 
@@ -11546,7 +11556,11 @@
 
 Return the value of attribute C<$att> or C<undef>
 
-this method is an lvalue, so you can do C<< $elt->{'att'}->{'foo'}= 'bar' >>
+=item latt          ($att)
+
+Return the value of attribute C<$att> or C<undef>
+
+this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or 
C<< $elt->latt( 'foo')++; >>
 
 =item set_att      ($att, $att_value)
 
@@ -12665,7 +12679,10 @@
 attribute are quite convenient when dealing with XHTML, or plain XML that
 will eventually be displayed using CSS)
 
-this method is an lvalue, so you can do C<< $elt->class= "foo" >>
+=item lclass
+
+same as class, except that
+this method is an lvalue, so you can do C<< $elt->lclass= "foo" >>
 
 =item set_class ($class)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/XML-Twig-3.37/Twig_pm.slow 
new/XML-Twig-3.38/Twig_pm.slow
--- old/XML-Twig-3.37/Twig_pm.slow      2010-10-09 18:17:09.000000000 +0200
+++ new/XML-Twig-3.38/Twig_pm.slow      2011-02-26 09:00:28.000000000 +0100
@@ -38,16 +38,16 @@
 
 # xml name (leading # allowed)
 # first line is for perl 5.005, second line for modern perl, that accept 
character classes
-my $REG_NAME       = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*)};     # does 
not work for leading non-ascii letters
-   $REG_NAME       = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*)};    # > perl 
5.5
+my $REG_NAME       = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*)};     # does 
not work for leading non-ascii letters
+   $REG_NAME       = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*)};    # > 
perl 5.5
 
 # name or wildcard (* or '') (leading # allowed)
-my $REG_NAME_W     = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # does 
not work for leading non-ascii letters
-   $REG_NAME_W     = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 
5.5
+my $REG_NAME_W     = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # does 
not work for leading non-ascii letters
+   $REG_NAME_W     = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # > 
perl 5.5
 
 # name or wildcard (* or '') (leading # allowed) with optional class
-my $REG_NAME_WC    = 
q{(?(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # 
does not work for leading non-ascii letters
-   $REG_NAME_WC    = 
q{(?:(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; 
# > perl 5.5
+my $REG_NAME_WC    = 
q{(?(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # 
does not work for leading non-ascii letters
+   $REG_NAME_WC    = 
q{(?:(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; 
# > perl 5.5
 
 
 my $REG_REGEXP     = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)};               # regexp
@@ -97,7 +97,7 @@
 
 BEGIN
 { 
-$VERSION = '3.37';
+$VERSION = '3.38';
 
 use XML::Parser;
 my $needVersion = '2.23';
@@ -783,6 +783,13 @@
     return $@ ? $t->_reset_twig_after_error : $t;
   }
 
+sub parseurl_html
+  { my $t= shift;
+    _use( 'LWP::Simple') or croak "missing LWP::Simple"; 
+    $t->parse_html( LWP::Simple::get( shift()), @_); 
+  }
+
+
 # uses eval to catch the parser's death
 sub safe_parse_html
   { my $t= shift;
@@ -847,7 +854,7 @@
     if( _is_well_formed_xml( $_[0]))
       { $t->parse( @_) }
     else
-      { my $html= _html2xml( $_[0]);
+      { my $html= $t->{use_tidy} ?  _tidy_html( $_[0]) : _html2xml( $_[0]);
         if( _is_well_formed_xml( $html))
           { $t->parse( $html); }
         else
@@ -913,7 +920,7 @@
                          wrap => 0,
                          break_before_br => 0,
                        };
-
+    $options ||= {};
     my $tidy_options= { %$TIDY_DEFAULTS, %$options};
     my $tidy = HTML::Tidy->new( $tidy_options);
     $tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is 
TIDY_ERROR, not clean
@@ -1366,7 +1373,7 @@
 sub _set_regexp_handler
   { my( $handlers, $path, $handler, $prev_handler)= @_; 
     # if the expression was a regexp it is now a string (it was stringified 
when it became a hash key)
-    if( $path=~ m{^\(\?([xism]*)(?:-[xism]*)?:(.*)\)$}) 
+    if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) 
       { my $regexp= qr/(?$1:$2)/; # convert it back into a regexp
         my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{_tag} =~ 
$regexp ) }; 
         my $handler_data=  { tag=> '*', score => { type => $REGEXP_TRIGGER} , 
trigger => $sub, 
@@ -5523,9 +5530,12 @@
   }
 
 # methods dealing with the class attribute, convenient if you work with xhtml
-sub class     
+sub class   {   $_[0]->{att}->{class}; }
+# lvalue version of class. separate from class to avoid problem like RT#
+sub lclass     
           :lvalue    # > perl 5.5
-{ my( $elt)= @_; $elt->att( 'class'); }
+  { $_[0]->{att}->{class}; }
+
 sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); }
 
 # adds a class to an element
@@ -5612,9 +5622,12 @@
     return $elt;
   }
  
-sub att 
+sub att {  $_[0]->{att}->{$_[1]}; }
+# lvalue version of att. separate from class to avoid problem like RT#
+sub latt 
           :lvalue    # > perl 5.5
-{ $_[0]->{att}->{$_[1]}; }
+  { $_[0]->{att}->{$_[1]}; }
+
 sub del_att 
   { my $elt= shift;
     while( @_) { delete $elt->{'att'}->{shift()}; }
@@ -7328,22 +7341,25 @@
       my $replacement_string;
       my $is_string= _is_string( $replace);
       foreach my $text_elt ($elt->descendants_or_self( $TEXT))
-        { if( $is_string)
+        { 
+          if( $is_string)
             { my $text= $text_elt->text;
               $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, 
$6, $7, $8, $9)}egx;
               $text_elt->set_text( $text);
            }
           else
-            { 
+            {  
               no utf8; # = perl 5.6
               my $replace_sub= ( $replace_sub{$replace} ||= 
_install_replace_sub( $replace)); 
               my $text= $text_elt->text;
               my $pos=0;  # used to skip text that was previously matched
+              my $found_hit;
               while( my( $pre_match_string, $match_string, @var)= ($text=~ 
m{(.*?)($regexp)}sg))
-                { my $match_start  = length( $pre_match_string);
-                  my $match        = $text_elt->split_at( $match_start + $pos);
+                { $found_hit=1;
+                  my $match_start  = length( $pre_match_string);
+                  my $match        = $match_start ? $text_elt->split_at( 
$match_start + $pos) : $text_elt;
                   my $match_length = length( $match_string);
-                  my $post_match   = $match->split_at( $match_length);
+                  my $post_match   = $match->split_at( $match_length); 
                   $replace_sub->( $match, @var);
                   # merge previous text with current one
                   my $next_sibling;
@@ -7358,21 +7374,12 @@
                   # go to next 
                   $text_elt= $post_match;
                   $text= $post_match->text;
-                  # merge last text element with next one if needed,
-                  # the match will be against the non-matched text,
-                  # so $pos is used to skip the merged part
-                  my $prev_sibling;
-                  if(    ($prev_sibling=  $post_match->_prev_sibling)
-                      && ($post_match->gi eq $prev_sibling->gi)
-                    )
-                    { $pos= length( $prev_sibling->text);
-                      $prev_sibling->merge_text( $post_match); 
-                    }
 
                   # if the match is at the end of the text an empty #PCDATA is 
left: remove it 
                   if( !$text_elt->text) { $text_elt->delete; } 
                   
                 }
+              if( $found_hit) { $text_elt->normalize; } # in case consecutive 
#PCDATA have been created 
               
             }
         }
@@ -7393,22 +7400,22 @@
   sub _install_replace_sub
     { my $replace_exp= shift;
       my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp;
-      my $sub= q{ my( $match, @var)= @_; unshift @var, undef; my $new; };
+      my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;};
       my( $gi, $exp);
       foreach my $item (@item)
-        { if(    $item=~ m{^&elt\s*\(([^)]*)\)})
-            { $exp= $1;
-            }
+        { next if ! length $item;
+          if(    $item=~ m{^&elt\s*\(([^)]*)\)})
+            { $exp= $1; }
           elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)})
             { $exp= " '#ENT' => $1"; }
           else
             { $exp= qq{ '#PCDATA' => "$item"}; }
-          $exp=~ s{\$(\d)}{\$var[$1]}g; # replace references to matches
+          $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references 
to matches
           $sub.= qq{ \$new= \$match->new( $exp); };
-          $sub .= q{ $new->paste( before => $match); };
+          $sub .= q{ $new->paste( after => $last_inserted); 
$last_inserted=$new;};
         }
       $sub .= q{ $match->delete; };
-      #$sub=~ s/;/;\n/g;
+      #$sub=~ s/;/;\n/g; warn "subs: $sub"; 
       my $coderef= eval "sub { $NO_WARNINGS; $sub }";
       if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); }
       return $coderef;
@@ -8442,6 +8449,7 @@
 
 } # end of block containing package globals ($pretty_print, $quotes, 
keep_encoding...)
 
+# merges consecutive #PCDATAs in am element
 sub normalize
   { my( $elt)= @_;
     my @descendants= $elt->descendants( $PCDATA);
@@ -10488,13 +10496,15 @@
 get get an extra CDATA section inside ( <!-- foo --> becomes
 <!-- <![CDATA[ foo ]]> -->
 
-=item parsefile_html
+=item parsefile_html ($file)
 
 parse an HTML file (by converting it to XML using HTML::TreeBuilder, which 
-needs to be available). The file is loaded completely in memory and converted
-to XML before being parsed.
+needs to be available, or HTML::Tidy if the C<use_tidy> option was used).
+The file is loaded completely in memory and converted to XML before being 
parsed.
+
+=item parseurl_html ($url $optional_user_agent)
 
-B<Alpha>: implementation, and thus generated XML could change. 
+parse an URL as html the same way C<L<parse_html>> does
 
 =item safe_parseurl_html ($url $optional_user_agent)
 
@@ -11548,7 +11558,11 @@
 
 Return the value of attribute C<$att> or C<undef>
 
-this method is an lvalue, so you can do C<< $elt->att( 'foo')= 'bar' >>
+=item latt          ($att)
+
+Return the value of attribute C<$att> or C<undef>
+
+this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or 
C<< $elt->latt( 'foo')++; >>
 
 =item set_att      ($att, $att_value)
 
@@ -12667,7 +12681,10 @@
 attribute are quite convenient when dealing with XHTML, or plain XML that
 will eventually be displayed using CSS)
 
-this method is an lvalue, so you can do C<< $elt->class= "foo" >>
+=item lclass
+
+same as class, except that
+this method is an lvalue, so you can do C<< $elt->lclass= "foo" >>
 
 =item set_class ($class)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/XML-Twig-3.37/t/test_3_36.t 
new/XML-Twig-3.38/t/test_3_36.t
--- old/XML-Twig-3.37/t/test_3_36.t     2010-10-09 09:39:36.000000000 +0200
+++ new/XML-Twig-3.38/t/test_3_36.t     2011-02-25 11:28:06.000000000 +0100
@@ -12,7 +12,7 @@
  
 use XML::Twig;
 
-my $TMAX=66;
+my $TMAX=67;
 print "1..$TMAX\n";
 
 { my $doc=q{<d><s id="s1"><t>title 1</t><s id="s2"><t>title 2</t></s><s 
id="s3"></s></s><s id="s4"></s></d>};
@@ -84,19 +84,22 @@
 
 { if( $] >= 5.006)
     { my $t= XML::Twig->parse( q{<d><e/></d>});
-      $t->first_elt( 'e')->att( 'a')= 'b';
+      $t->first_elt( 'e')->latt( 'a')= 'b';
       is( $t->sprint, q{<d><e a="b"/></d>}, 'lvalued attribute (no 
attributes)');
-      $t->first_elt( 'e')->att( 'c')= 'd';
+      $t->first_elt( 'e')->latt( 'c')= 'd';
       is( $t->sprint, q{<d><e a="b" c="d"/></d>}, 'lvalued attribute 
(attributes)');
-      $t->first_elt( 'e')->att( 'c')= '';
+      $t->first_elt( 'e')->latt( 'c')= '';
       is( $t->sprint, q{<d><e a="b" c=""/></d>}, 'lvalued attribute (modifying 
existing attributes)');
-      $t->root->class= 'foo';
+      $t->root->lclass= 'foo';
       is( $t->sprint, q{<d class="foo"><e a="b" c=""/></d>}, 'lvalued class 
(new class)');
-      $t->root->class=~ s{fo}{tot};
+      $t->root->lclass=~ s{fo}{tot};
       is( $t->sprint, q{<d class="toto"><e a="b" c=""/></d>}, 'lvalued class 
(modify class)');
+      $t= XML::Twig->parse( '<d a="1"/>');
+      $t->root->latt( 'a')++;
+      is( $t->sprint, '<d a="2"/>', '++ on attribute');
     }
   else
-    { skip( 5 => "cannot use lvalued attributes with perl $]"); }
+    { skip( 6 => "cannot use lvalued attributes with perl $]"); }
 }
 
 # used for all HTML parsing tests with HTML::Tidy 
@@ -112,7 +115,6 @@
     }
   else
     {
-
       my $doc= '<html><head><title>a title</title></head><body>par 1<p>par 
2<br>after the break</body></html>';
       my $t= XML::Twig->new( use_tidy => 1)->parse_html( $doc);
       my $inner= '<ul><li>foo</li><li>bar</li></ul>';
@@ -240,7 +242,7 @@
 
 { if( XML::Twig::_use( 'HTML::TreeBuilder'))
     { my $html_with_Amp= XML::Twig->new->parse_html( 
'<html><head></head><body>&Amp;</body></html>')->sprint;
-      if( $HTML::TreeBuilder::VERSION < 4.00)
+      if( $HTML::TreeBuilder::VERSION <= 3.23)
         { is( $html_with_Amp, '<html><head></head><body>&amp;</body></html>', 
'&Amp; used in html (fixed HTB < 4.00)'); }
       else
         { is( $html_with_Amp, 
'<html><head></head><body>&amp;Amp;</body></html>', '&Amp; used in html (NOT 
fixed HTB > r.00)'); }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/XML-Twig-3.37/t/tools.pm new/XML-Twig-3.38/t/tools.pm
--- old/XML-Twig-3.37/t/tools.pm        2010-09-21 19:43:40.000000000 +0200
+++ new/XML-Twig-3.38/t/tools.pm        2011-02-25 11:00:45.000000000 +0100
@@ -392,6 +392,13 @@
     return $perl;
   }
 
+# scrubs xhtml generated by tools from likely to change bits
+sub scrub_xhtml
+  { my( $html)= @_;
+    $html=~ s{<!DOCTYPE[^>]*>}{};   # scrub doctype
+    $html=~ s{\s*xmlns="[^"]*"}{};  # scrup namespace declaration
+    return $html;
+  }
 
 __END__
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/XML-Twig-3.37/tools/xml_grep/xml_grep 
new/XML-Twig-3.38/tools/xml_grep/xml_grep
--- old/XML-Twig-3.37/tools/xml_grep/xml_grep   2010-09-20 18:51:37.000000000 
+0200
+++ new/XML-Twig-3.38/tools/xml_grep/xml_grep   2011-02-26 08:51:37.000000000 
+0100
@@ -5,14 +5,15 @@
 use Pod::Usage;
 use XML::Twig;
 
-my $VERSION="0.8";
+my $VERSION="0.9";
 
 # options (all used globally in the script)
 my( $help, $man, @roots, @paths, $files, 
     $count, $nb_results, $nb_results_per_file,
     $encoding, @exclude,
     $wrap, $nowrap, $descr, $group, $pretty_print, $version, $text_only, 
$date, 
-    $html, $verbose, $strict
+    $html, $tidy,
+    $verbose, $strict
   );
 
 # used to check if the wrapping tags need to be output
@@ -44,6 +45,7 @@
             'date!'           => \$date,
             'strict'          => \$strict,
             'html'            => \$html,
+            'tidy'            => \$tidy,
             'verbose'         => \$verbose,
     ) or pod2usage(2);
 
@@ -113,7 +115,7 @@
   { create_regular_handlers( \%options, \@roots, \@paths);
   }
     
-    
+if( $tidy) { $html= 1; $options{use_tidy}= 1; }     
 
 
 $options{pretty_print}    = $pretty_print if( $pretty_print);
@@ -418,12 +420,20 @@
 
 if the option is used but no style is given then 'C<indented>' is used
 
-short form for this arggument is B<-s>
+short form for this argument is B<-s>
 
 =item B<--text_only>
 
 Displays the text of the results, one by line.
 
+=item B<--html>
+
+Allow HTML input, files are converted using HTML::TreeBuilder
+
+=item B<--Tidy>
+
+Allow HTML input, files are converted using HTML::Tidy 
+
 =back
 
 =head2 Condition Syntax


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++



Remember to have fun...

-- 
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

commit perl-XML-Twig for openSUSE:Factory

Reply via email to