Hello community, here is the log from the commit of package perl-XML-Twig for openSUSE:Factory checked in at Mon Mar 7 15:09:29 CET 2011.
-------- --- perl-XML-Twig/perl-XML-Twig.changes 2010-12-16 13:56:09.000000000 +0100 +++ /mounts/work_src_done/STABLE/perl-XML-Twig/perl-XML-Twig.changes 2011-02-28 00:55:32.000000000 +0100 @@ -1,0 +2,15 @@ +Sun Feb 27 23:43:46 UTC 2011 - [email protected] + +- update to 3.38: + * fixed: RT#65865: _ should be allowed at the start on an XML name + * removed: making att and class lvalues created problems: in certain context + they made regular calls to the method create empty attributes. I + could find no satisfactory fix,they were either incompletes, or to complex + for often used methods. So att and class are back to being regular, non + l-value methods. latt and lclass are the l-value versions. + * added: documented the -html option for xml_grep, that allows processing + HTML input + * added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert HTML + to XML + +------------------------------------------------------------------- calling whatdependson for head-i586 Old: ---- XML-Twig-3.37.tar.bz2 New: ---- XML-Twig-3.38.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ perl-XML-Twig.spec ++++++ --- /var/tmp/diff_new_pack.cYmafR/_old 2011-03-07 15:07:45.000000000 +0100 +++ /var/tmp/diff_new_pack.cYmafR/_new 2011-03-07 15:07:45.000000000 +0100 @@ -1,7 +1,7 @@ # -# spec file for package perl-XML-Twig (Version 3.37 ) +# spec file for package perl-XML-Twig # -# Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -19,7 +19,7 @@ Name: perl-XML-Twig -Version: 3.37 +Version: 3.38 Release: 1 Requires: perl-XML-Parser BuildRequires: perl-XML-Parser @@ -27,8 +27,9 @@ AutoReqProv: on Group: Development/Libraries/Perl License: Artistic -Url: http://cpan.org/modules/by-module/XML/ -Summary: Tree interface to XML documents allowing processing chunk by chunk of huge documents +Url: http://search.cpan.org/dist/XML::Twig +Summary: Tree interface to XML documents +# http://search.cpan.org/CPAN/authors/id/M/MI/MIROD/XML-Twig-%{version}.tar.gz Source: XML-Twig-%{version}.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-build %{perl_requires} @@ -61,25 +62,31 @@ %setup -q -n XML-Twig-%{version} %build -perl Makefile.PL -make +%__perl Makefile.PL +%__make %check -make test +%__make test %install %perl_make_install %perl_process_packlist %clean -rm -rf $RPM_BUILD_ROOT +%{?buildroot:%__rm -rf "%{buildroot}"} %files %defattr(-,root,root) %doc Changes README -%doc %{_mandir}/man?/* -%{perl_vendorlib}/XML -%{perl_vendorarch}/auto/XML -%{_bindir}/xml_* +%dir %{perl_vendorlib}/XML +%{perl_vendorlib}/XML/Twig.pm +%{perl_vendorlib}/XML/Twig +%{_bindir}/xml_grep +%{_bindir}/xml_merge +%{_bindir}/xml_pp +%{_bindir}/xml_spellcheck +%{_bindir}/xml_split +%doc %{_mandir}/man1/xml_*.1%{ext_man} +%doc %{_mandir}/man3/XML::Twig.%{perl_man3ext}%{ext_man} %changelog ++++++ XML-Twig-3.37.tar.bz2 -> XML-Twig-3.38.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/Changes new/XML-Twig-3.38/Changes --- old/XML-Twig-3.37/Changes 2010-10-09 07:45:32.000000000 +0200 +++ new/XML-Twig-3.38/Changes 2011-02-27 07:40:45.000000000 +0100 @@ -1,11 +1,38 @@ $Id: /xmltwig/trunk/Changes 33 2008-04-30T08:03:41.004487Z mrodrigu $ CHANGES +version 3.38 +date: 2011-07-27 +# minor maintenance release +fixed: RT 65865: _ should be allowed at the start on an XML name + https://rt.cpan.org/Ticket/Display.html?id=65865 + reported by Steve Prokopowich +removed: making att and class lvalues created problems: in certain + context they made regular calls to the method create empty +� attributes. I could find no satisfactory fix,they were either + incompletes, or to complex for often used methods. So att and + class are back to being regular, non l-value methods. + latt and lclass are the l-value versions. +added: documented the -html option for xml_grep, that allows processing + HTML input +added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert + HTML to XML + version 3.37 date: 2010-10-08 # minor maintenance release fixed: more tests fixed for HTML::TreeBuilder, hopefully will pass now +removed: making att and class lvalues created problems: in certain + context they made regular calls to the method create empty +� attributes. I could find no satisfactory fix,they were either + incompletes, or to complex for often used methods. So att and + class are back to being regular, non l-value methods. + latt and lclass are the l-value versions. +added: documented the -html option for xml_grep, that allows processing + HTML input +added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert + HTML to XML version 3.36 date: 2010-10-07 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/META.yml new/XML-Twig-3.38/META.yml --- old/XML-Twig-3.37/META.yml 2010-10-10 07:52:02.000000000 +0200 +++ new/XML-Twig-3.38/META.yml 2011-02-27 07:42:00.000000000 +0100 @@ -1,6 +1,6 @@ --- #YAML:1.0 name: XML-Twig -version: 3.37 +version: 3.38 abstract: XML, The Perl Way author: - Michel Rodriguez <[email protected]> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/Twig.pm new/XML-Twig-3.38/Twig.pm --- old/XML-Twig-3.37/Twig.pm 2010-10-10 07:52:01.000000000 +0200 +++ new/XML-Twig-3.38/Twig.pm 2011-02-27 07:41:59.000000000 +0100 @@ -38,16 +38,16 @@ # xml name (leading # allowed) # first line is for perl 5.005, second line for modern perl, that accept character classes -my $REG_NAME = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters - $REG_NAME = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5 +my $REG_NAME = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters + $REG_NAME = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5 # name or wildcard (* or '') (leading # allowed) -my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters - $REG_NAME_W = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5 +my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters + $REG_NAME_W = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5 # name or wildcard (* or '') (leading # allowed) with optional class -my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters - $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5 +my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters + $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5 my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp @@ -97,7 +97,7 @@ BEGIN { -$VERSION = '3.37'; +$VERSION = '3.38'; use XML::Parser; my $needVersion = '2.23'; @@ -783,6 +783,13 @@ return $@ ? $t->_reset_twig_after_error : $t; } +sub parseurl_html + { my $t= shift; + _use( 'LWP::Simple') or croak "missing LWP::Simple"; + $t->parse_html( LWP::Simple::get( shift()), @_); + } + + # uses eval to catch the parser's death sub safe_parse_html { my $t= shift; @@ -847,7 +854,7 @@ if( _is_well_formed_xml( $_[0])) { $t->parse( @_) } else - { my $html= _html2xml( $_[0]); + { my $html= $t->{use_tidy} ? _tidy_html( $_[0]) : _html2xml( $_[0]); if( _is_well_formed_xml( $html)) { $t->parse( $html); } else @@ -913,7 +920,7 @@ wrap => 0, break_before_br => 0, }; - + $options ||= {}; my $tidy_options= { %$TIDY_DEFAULTS, %$options}; my $tidy = HTML::Tidy->new( $tidy_options); $tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean @@ -1366,7 +1373,7 @@ sub _set_regexp_handler { my( $handlers, $path, $handler, $prev_handler)= @_; # if the expression was a regexp it is now a string (it was stringified when it became a hash key) - if( $path=~ m{^\(\?([xism]*)(?:-[xism]*)?:(.*)\)$}) + if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) { my $regexp= qr/(?$1:$2)/; # convert it back into a regexp my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{_tag} =~ $regexp ) }; my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub, @@ -5522,9 +5529,12 @@ } # methods dealing with the class attribute, convenient if you work with xhtml -sub class +sub class { $_[0]->{att}->{class}; } +# lvalue version of class. separate from class to avoid problem like RT# +sub lclass :lvalue # > perl 5.5 -{ my( $elt)= @_; $elt->{'att'}->{'class'}; } + { $_[0]->{att}->{class}; } + sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); } # adds a class to an element @@ -5611,9 +5621,12 @@ return $elt; } -sub att +sub att { $_[0]->{att}->{$_[1]}; } +# lvalue version of att. separate from class to avoid problem like RT# +sub latt :lvalue # > perl 5.5 -{ $_[0]->{att}->{$_[1]}; } + { $_[0]->{att}->{$_[1]}; } + sub del_att { my $elt= shift; while( @_) { delete $elt->{'att'}->{shift()}; } @@ -7327,22 +7340,25 @@ my $replacement_string; my $is_string= _is_string( $replace); foreach my $text_elt ($elt->descendants_or_self( $TEXT)) - { if( $is_string) + { + if( $is_string) { my $text= $text_elt->text; $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx; $text_elt->set_text( $text); } else - { + { no utf8; # = perl 5.6 my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace)); my $text= $text_elt->text; my $pos=0; # used to skip text that was previously matched + my $found_hit; while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg)) - { my $match_start = length( $pre_match_string); - my $match = $text_elt->split_at( $match_start + $pos); + { $found_hit=1; + my $match_start = length( $pre_match_string); + my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt; my $match_length = length( $match_string); - my $post_match = $match->split_at( $match_length); + my $post_match = $match->split_at( $match_length); $replace_sub->( $match, @var); # merge previous text with current one my $next_sibling; @@ -7357,21 +7373,12 @@ # go to next $text_elt= $post_match; $text= $post_match->text; - # merge last text element with next one if needed, - # the match will be against the non-matched text, - # so $pos is used to skip the merged part - my $prev_sibling; - if( ($prev_sibling= $post_match->{prev_sibling}) - && ($XML::Twig::index2gi[$post_match->{'gi'}] eq $XML::Twig::index2gi[$prev_sibling->{'gi'}]) - ) - { $pos= length( $prev_sibling->text); - $prev_sibling->merge_text( $post_match); - } # if the match is at the end of the text an empty #PCDATA is left: remove it if( !$text_elt->text) { $text_elt->delete; } } + if( $found_hit) { $text_elt->normalize; } # in case consecutive #PCDATA have been created } } @@ -7392,22 +7399,22 @@ sub _install_replace_sub { my $replace_exp= shift; my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp; - my $sub= q{ my( $match, @var)= @_; unshift @var, undef; my $new; }; + my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;}; my( $gi, $exp); foreach my $item (@item) - { if( $item=~ m{^&elt\s*\(([^)]*)\)}) - { $exp= $1; - } + { next if ! length $item; + if( $item=~ m{^&elt\s*\(([^)]*)\)}) + { $exp= $1; } elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)}) { $exp= " '#ENT' => $1"; } else { $exp= qq{ '#PCDATA' => "$item"}; } - $exp=~ s{\$(\d)}{\$var[$1]}g; # replace references to matches + $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches $sub.= qq{ \$new= \$match->new( $exp); }; - $sub .= q{ $new->paste( before => $match); }; + $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;}; } $sub .= q{ $match->delete; }; - #$sub=~ s/;/;\n/g; + #$sub=~ s/;/;\n/g; warn "subs: $sub"; my $coderef= eval "sub { $NO_WARNINGS; $sub }"; if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); } return $coderef; @@ -8440,6 +8447,7 @@ } # end of block containing package globals ($pretty_print, $quotes, keep_encoding...) +# merges consecutive #PCDATAs in am element sub normalize { my( $elt)= @_; my @descendants= $elt->descendants( $PCDATA); @@ -10486,13 +10494,15 @@ get get an extra CDATA section inside ( <!-- foo --> becomes <!-- <![CDATA[ foo ]]> --> -=item parsefile_html +=item parsefile_html ($file) parse an HTML file (by converting it to XML using HTML::TreeBuilder, which -needs to be available). The file is loaded completely in memory and converted -to XML before being parsed. +needs to be available, or HTML::Tidy if the C<use_tidy> option was used). +The file is loaded completely in memory and converted to XML before being parsed. + +=item parseurl_html ($url $optional_user_agent) -B<Alpha>: implementation, and thus generated XML could change. +parse an URL as html the same way C<L<parse_html>> does =item safe_parseurl_html ($url $optional_user_agent) @@ -11546,7 +11556,11 @@ Return the value of attribute C<$att> or C<undef> -this method is an lvalue, so you can do C<< $elt->{'att'}->{'foo'}= 'bar' >> +=item latt ($att) + +Return the value of attribute C<$att> or C<undef> + +this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or C<< $elt->latt( 'foo')++; >> =item set_att ($att, $att_value) @@ -12665,7 +12679,10 @@ attribute are quite convenient when dealing with XHTML, or plain XML that will eventually be displayed using CSS) -this method is an lvalue, so you can do C<< $elt->class= "foo" >> +=item lclass + +same as class, except that +this method is an lvalue, so you can do C<< $elt->lclass= "foo" >> =item set_class ($class) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/Twig_pm.slow new/XML-Twig-3.38/Twig_pm.slow --- old/XML-Twig-3.37/Twig_pm.slow 2010-10-09 18:17:09.000000000 +0200 +++ new/XML-Twig-3.38/Twig_pm.slow 2011-02-26 09:00:28.000000000 +0100 @@ -38,16 +38,16 @@ # xml name (leading # allowed) # first line is for perl 5.005, second line for modern perl, that accept character classes -my $REG_NAME = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters - $REG_NAME = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5 +my $REG_NAME = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters + $REG_NAME = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5 # name or wildcard (* or '') (leading # allowed) -my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters - $REG_NAME_W = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5 +my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters + $REG_NAME_W = q{(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5 # name or wildcard (* or '') (leading # allowed) with optional class -my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters - $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5 +my $REG_NAME_WC = q{(?(?:(?:[^\W\d]|[:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # does not work for leading non-ascii letters + $REG_NAME_WC = q{(?:(?:(?:[[:alpha:]:#_])(?:[\w.-]*:)?[\w.-]*|\*)(?:\.[\w-]+)?|(?:\.[\w-]+))}; # > perl 5.5 my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp @@ -97,7 +97,7 @@ BEGIN { -$VERSION = '3.37'; +$VERSION = '3.38'; use XML::Parser; my $needVersion = '2.23'; @@ -783,6 +783,13 @@ return $@ ? $t->_reset_twig_after_error : $t; } +sub parseurl_html + { my $t= shift; + _use( 'LWP::Simple') or croak "missing LWP::Simple"; + $t->parse_html( LWP::Simple::get( shift()), @_); + } + + # uses eval to catch the parser's death sub safe_parse_html { my $t= shift; @@ -847,7 +854,7 @@ if( _is_well_formed_xml( $_[0])) { $t->parse( @_) } else - { my $html= _html2xml( $_[0]); + { my $html= $t->{use_tidy} ? _tidy_html( $_[0]) : _html2xml( $_[0]); if( _is_well_formed_xml( $html)) { $t->parse( $html); } else @@ -913,7 +920,7 @@ wrap => 0, break_before_br => 0, }; - + $options ||= {}; my $tidy_options= { %$TIDY_DEFAULTS, %$options}; my $tidy = HTML::Tidy->new( $tidy_options); $tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean @@ -1366,7 +1373,7 @@ sub _set_regexp_handler { my( $handlers, $path, $handler, $prev_handler)= @_; # if the expression was a regexp it is now a string (it was stringified when it became a hash key) - if( $path=~ m{^\(\?([xism]*)(?:-[xism]*)?:(.*)\)$}) + if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) { my $regexp= qr/(?$1:$2)/; # convert it back into a regexp my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{_tag} =~ $regexp ) }; my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub, @@ -5523,9 +5530,12 @@ } # methods dealing with the class attribute, convenient if you work with xhtml -sub class +sub class { $_[0]->{att}->{class}; } +# lvalue version of class. separate from class to avoid problem like RT# +sub lclass :lvalue # > perl 5.5 -{ my( $elt)= @_; $elt->att( 'class'); } + { $_[0]->{att}->{class}; } + sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); } # adds a class to an element @@ -5612,9 +5622,12 @@ return $elt; } -sub att +sub att { $_[0]->{att}->{$_[1]}; } +# lvalue version of att. separate from class to avoid problem like RT# +sub latt :lvalue # > perl 5.5 -{ $_[0]->{att}->{$_[1]}; } + { $_[0]->{att}->{$_[1]}; } + sub del_att { my $elt= shift; while( @_) { delete $elt->{'att'}->{shift()}; } @@ -7328,22 +7341,25 @@ my $replacement_string; my $is_string= _is_string( $replace); foreach my $text_elt ($elt->descendants_or_self( $TEXT)) - { if( $is_string) + { + if( $is_string) { my $text= $text_elt->text; $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx; $text_elt->set_text( $text); } else - { + { no utf8; # = perl 5.6 my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace)); my $text= $text_elt->text; my $pos=0; # used to skip text that was previously matched + my $found_hit; while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg)) - { my $match_start = length( $pre_match_string); - my $match = $text_elt->split_at( $match_start + $pos); + { $found_hit=1; + my $match_start = length( $pre_match_string); + my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt; my $match_length = length( $match_string); - my $post_match = $match->split_at( $match_length); + my $post_match = $match->split_at( $match_length); $replace_sub->( $match, @var); # merge previous text with current one my $next_sibling; @@ -7358,21 +7374,12 @@ # go to next $text_elt= $post_match; $text= $post_match->text; - # merge last text element with next one if needed, - # the match will be against the non-matched text, - # so $pos is used to skip the merged part - my $prev_sibling; - if( ($prev_sibling= $post_match->_prev_sibling) - && ($post_match->gi eq $prev_sibling->gi) - ) - { $pos= length( $prev_sibling->text); - $prev_sibling->merge_text( $post_match); - } # if the match is at the end of the text an empty #PCDATA is left: remove it if( !$text_elt->text) { $text_elt->delete; } } + if( $found_hit) { $text_elt->normalize; } # in case consecutive #PCDATA have been created } } @@ -7393,22 +7400,22 @@ sub _install_replace_sub { my $replace_exp= shift; my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp; - my $sub= q{ my( $match, @var)= @_; unshift @var, undef; my $new; }; + my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;}; my( $gi, $exp); foreach my $item (@item) - { if( $item=~ m{^&elt\s*\(([^)]*)\)}) - { $exp= $1; - } + { next if ! length $item; + if( $item=~ m{^&elt\s*\(([^)]*)\)}) + { $exp= $1; } elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)}) { $exp= " '#ENT' => $1"; } else { $exp= qq{ '#PCDATA' => "$item"}; } - $exp=~ s{\$(\d)}{\$var[$1]}g; # replace references to matches + $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches $sub.= qq{ \$new= \$match->new( $exp); }; - $sub .= q{ $new->paste( before => $match); }; + $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;}; } $sub .= q{ $match->delete; }; - #$sub=~ s/;/;\n/g; + #$sub=~ s/;/;\n/g; warn "subs: $sub"; my $coderef= eval "sub { $NO_WARNINGS; $sub }"; if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); } return $coderef; @@ -8442,6 +8449,7 @@ } # end of block containing package globals ($pretty_print, $quotes, keep_encoding...) +# merges consecutive #PCDATAs in am element sub normalize { my( $elt)= @_; my @descendants= $elt->descendants( $PCDATA); @@ -10488,13 +10496,15 @@ get get an extra CDATA section inside ( <!-- foo --> becomes <!-- <![CDATA[ foo ]]> --> -=item parsefile_html +=item parsefile_html ($file) parse an HTML file (by converting it to XML using HTML::TreeBuilder, which -needs to be available). The file is loaded completely in memory and converted -to XML before being parsed. +needs to be available, or HTML::Tidy if the C<use_tidy> option was used). +The file is loaded completely in memory and converted to XML before being parsed. + +=item parseurl_html ($url $optional_user_agent) -B<Alpha>: implementation, and thus generated XML could change. +parse an URL as html the same way C<L<parse_html>> does =item safe_parseurl_html ($url $optional_user_agent) @@ -11548,7 +11558,11 @@ Return the value of attribute C<$att> or C<undef> -this method is an lvalue, so you can do C<< $elt->att( 'foo')= 'bar' >> +=item latt ($att) + +Return the value of attribute C<$att> or C<undef> + +this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or C<< $elt->latt( 'foo')++; >> =item set_att ($att, $att_value) @@ -12667,7 +12681,10 @@ attribute are quite convenient when dealing with XHTML, or plain XML that will eventually be displayed using CSS) -this method is an lvalue, so you can do C<< $elt->class= "foo" >> +=item lclass + +same as class, except that +this method is an lvalue, so you can do C<< $elt->lclass= "foo" >> =item set_class ($class) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/t/test_3_36.t new/XML-Twig-3.38/t/test_3_36.t --- old/XML-Twig-3.37/t/test_3_36.t 2010-10-09 09:39:36.000000000 +0200 +++ new/XML-Twig-3.38/t/test_3_36.t 2011-02-25 11:28:06.000000000 +0100 @@ -12,7 +12,7 @@ use XML::Twig; -my $TMAX=66; +my $TMAX=67; print "1..$TMAX\n"; { my $doc=q{<d><s id="s1"><t>title 1</t><s id="s2"><t>title 2</t></s><s id="s3"></s></s><s id="s4"></s></d>}; @@ -84,19 +84,22 @@ { if( $] >= 5.006) { my $t= XML::Twig->parse( q{<d><e/></d>}); - $t->first_elt( 'e')->att( 'a')= 'b'; + $t->first_elt( 'e')->latt( 'a')= 'b'; is( $t->sprint, q{<d><e a="b"/></d>}, 'lvalued attribute (no attributes)'); - $t->first_elt( 'e')->att( 'c')= 'd'; + $t->first_elt( 'e')->latt( 'c')= 'd'; is( $t->sprint, q{<d><e a="b" c="d"/></d>}, 'lvalued attribute (attributes)'); - $t->first_elt( 'e')->att( 'c')= ''; + $t->first_elt( 'e')->latt( 'c')= ''; is( $t->sprint, q{<d><e a="b" c=""/></d>}, 'lvalued attribute (modifying existing attributes)'); - $t->root->class= 'foo'; + $t->root->lclass= 'foo'; is( $t->sprint, q{<d class="foo"><e a="b" c=""/></d>}, 'lvalued class (new class)'); - $t->root->class=~ s{fo}{tot}; + $t->root->lclass=~ s{fo}{tot}; is( $t->sprint, q{<d class="toto"><e a="b" c=""/></d>}, 'lvalued class (modify class)'); + $t= XML::Twig->parse( '<d a="1"/>'); + $t->root->latt( 'a')++; + is( $t->sprint, '<d a="2"/>', '++ on attribute'); } else - { skip( 5 => "cannot use lvalued attributes with perl $]"); } + { skip( 6 => "cannot use lvalued attributes with perl $]"); } } # used for all HTML parsing tests with HTML::Tidy @@ -112,7 +115,6 @@ } else { - my $doc= '<html><head><title>a title</title></head><body>par 1<p>par 2<br>after the break</body></html>'; my $t= XML::Twig->new( use_tidy => 1)->parse_html( $doc); my $inner= '<ul><li>foo</li><li>bar</li></ul>'; @@ -240,7 +242,7 @@ { if( XML::Twig::_use( 'HTML::TreeBuilder')) { my $html_with_Amp= XML::Twig->new->parse_html( '<html><head></head><body>&Amp;</body></html>')->sprint; - if( $HTML::TreeBuilder::VERSION < 4.00) + if( $HTML::TreeBuilder::VERSION <= 3.23) { is( $html_with_Amp, '<html><head></head><body>&</body></html>', '&Amp; used in html (fixed HTB < 4.00)'); } else { is( $html_with_Amp, '<html><head></head><body>&Amp;</body></html>', '&Amp; used in html (NOT fixed HTB > r.00)'); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/t/tools.pm new/XML-Twig-3.38/t/tools.pm --- old/XML-Twig-3.37/t/tools.pm 2010-09-21 19:43:40.000000000 +0200 +++ new/XML-Twig-3.38/t/tools.pm 2011-02-25 11:00:45.000000000 +0100 @@ -392,6 +392,13 @@ return $perl; } +# scrubs xhtml generated by tools from likely to change bits +sub scrub_xhtml + { my( $html)= @_; + $html=~ s{<!DOCTYPE[^>]*>}{}; # scrub doctype + $html=~ s{\s*xmlns="[^"]*"}{}; # scrup namespace declaration + return $html; + } __END__ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.37/tools/xml_grep/xml_grep new/XML-Twig-3.38/tools/xml_grep/xml_grep --- old/XML-Twig-3.37/tools/xml_grep/xml_grep 2010-09-20 18:51:37.000000000 +0200 +++ new/XML-Twig-3.38/tools/xml_grep/xml_grep 2011-02-26 08:51:37.000000000 +0100 @@ -5,14 +5,15 @@ use Pod::Usage; use XML::Twig; -my $VERSION="0.8"; +my $VERSION="0.9"; # options (all used globally in the script) my( $help, $man, @roots, @paths, $files, $count, $nb_results, $nb_results_per_file, $encoding, @exclude, $wrap, $nowrap, $descr, $group, $pretty_print, $version, $text_only, $date, - $html, $verbose, $strict + $html, $tidy, + $verbose, $strict ); # used to check if the wrapping tags need to be output @@ -44,6 +45,7 @@ 'date!' => \$date, 'strict' => \$strict, 'html' => \$html, + 'tidy' => \$tidy, 'verbose' => \$verbose, ) or pod2usage(2); @@ -113,7 +115,7 @@ { create_regular_handlers( \%options, \@roots, \@paths); } - +if( $tidy) { $html= 1; $options{use_tidy}= 1; } $options{pretty_print} = $pretty_print if( $pretty_print); @@ -418,12 +420,20 @@ if the option is used but no style is given then 'C<indented>' is used -short form for this arggument is B<-s> +short form for this argument is B<-s> =item B<--text_only> Displays the text of the results, one by line. +=item B<--html> + +Allow HTML input, files are converted using HTML::TreeBuilder + +=item B<--Tidy> + +Allow HTML input, files are converted using HTML::Tidy + =back =head2 Condition Syntax ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Remember to have fun... -- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
