Hello community, here is the log from the commit of package perl-XML-Twig for openSUSE:Factory checked in at 2015-04-25 21:15:42 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/perl-XML-Twig (Old) and /work/SRC/openSUSE:Factory/.perl-XML-Twig.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "perl-XML-Twig" Changes: -------- --- /work/SRC/openSUSE:Factory/perl-XML-Twig/perl-XML-Twig.changes 2014-09-17 17:24:33.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.perl-XML-Twig.new/perl-XML-Twig.changes 2015-04-25 21:15:43.000000000 +0200 @@ -1,0 +2,41 @@ +Tue Apr 14 19:18:33 UTC 2015 - [email protected] + +- updated to 3.49 + see /usr/share/doc/packages/perl-XML-Twig/Changes + + 3.49 - 2015-03-24 + + - added: the DTD_base option to XML::Twig new, that forces XML::Twig to look + for the DTD in a given directory + thanks to Arun lakhana for the idea + + - fixed: XML::Parser 2.43 caused a failure in the tests due to a change in + its behaviour when die-ing. + + - fixed: prevent PAUSE from trying to index packages that are only used + for monkey patching (to re-use XML::XPath as the XPath engine for + XML::Twig::XPath). Will also prevent UNAUTHORIZED flag on metacpan. + patch sent by Graham Knop + + - fixed: RT #96009 + keep_atts_order => 0 behaviour. Spotted by Dolmen + https://rt.cpan.org/Public/Bug/Display.html?id=96009 + + - fixed bug RT #97461 + wrong error message was returned calling parse on an invalid filehandle + Thanks to Slaven Rezic for the bug report and test case + https://rt.cpan.org/Public/Bug/Display.html?id=97461 + + - fixed: RT #98801 + COMPATIBILITY WARNING + inconsistency between simplify and XML::Simple for empty elements (including + elements with start and end tags but no contents) + the XML::Simple behaviour is to map them to an empty hash, not an + empty/undef scalar (depending of whether the element is a PCDATA or not) + as was the case in previous versions of the module. + This has the potential to break some existing code, but simplify should be + strictly the same as XML::Simple's XMLin + Thanks to Vangelis Katsikaros for the bug report and test case + https://rt.cpan.org/Public/Bug/Display.html?id=98801 + +------------------------------------------------------------------- Old: ---- XML-Twig-3.48.tar.gz New: ---- XML-Twig-3.49.tar.gz cpanspec.yml ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ perl-XML-Twig.spec ++++++ --- /var/tmp/diff_new_pack.zdGsNw/_old 2015-04-25 21:15:44.000000000 +0200 +++ /var/tmp/diff_new_pack.zdGsNw/_new 2015-04-25 21:15:44.000000000 +0200 @@ -1,7 +1,7 @@ # # spec file for package perl-XML-Twig # -# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2015 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,14 +17,15 @@ Name: perl-XML-Twig -Version: 3.48 +Version: 3.49 Release: 0 %define cpan_name XML-Twig -Summary: A perl module for processing huge XML documents in tree mode. +Summary: Perl Module for Processing Huge Xml Documents in Tree Mode License: Artistic-1.0 or GPL-1.0+ Group: Development/Libraries/Perl Url: http://search.cpan.org/dist/XML-Twig/ -Source: http://www.cpan.org/authors/id/M/MI/MIROD/%{cpan_name}-%{version}.tar.gz +Source0: http://www.cpan.org/authors/id/M/MI/MIROD/%{cpan_name}-%{version}.tar.gz +Source1: cpanspec.yml BuildArch: noarch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: perl @@ -32,7 +33,7 @@ BuildRequires: perl(XML::Parser) >= 2.23 Requires: perl(XML::Parser) >= 2.23 %{perl_requires} -# MANUAL +# MANUAL BEGIN BuildRequires: expat BuildRequires: perl-HTML-Tidy BuildRequires: perl-IO-CaptureOutput @@ -49,12 +50,12 @@ Requires: expat Requires: perl-XML-Parser Requires: perl(Encode) - -Recommends: perl-HTML-Tidy -Recommends: perl-Text-Wrapper -Recommends: perl-Tie-IxHash -Recommends: perl-XML-XPath -Recommends: perl-XML-XPathEngine +BuildRequires: perl-HTML-Tidy +BuildRequires: perl-Text-Wrapper +BuildRequires: perl-Tie-IxHash +BuildRequires: perl-XML-XPath +BuildRequires: perl-XML-XPathEngine +# MANUAL END %description This module provides a way to process XML documents. It is build on top of ++++++ XML-Twig-3.48.tar.gz -> XML-Twig-3.49.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/Changes new/XML-Twig-3.49/Changes --- old/XML-Twig-3.48/Changes 2014-03-30 08:22:48.000000000 +0200 +++ new/XML-Twig-3.49/Changes 2015-04-12 10:51:48.000000000 +0200 @@ -1,8 +1,43 @@ CHANGES +3.49 - 2015-03-24 + +- added: the DTD_base option to XML::Twig new, that forces XML::Twig to look + for the DTD in a given directory + thanks to Arun lakhana for the idea + +- fixed: XML::Parser 2.43 caused a failure in the tests due to a change in + its behaviour when die-ing. + +- fixed: prevent PAUSE from trying to index packages that are only used + for monkey patching (to re-use XML::XPath as the XPath engine for + XML::Twig::XPath). Will also prevent UNAUTHORIZED flag on metacpan. + patch sent by Graham Knop + +- fixed: RT #96009 + keep_atts_order => 0 behaviour. Spotted by Dolmen + https://rt.cpan.org/Public/Bug/Display.html?id=96009 + +- fixed bug RT #97461 + wrong error message was returned calling parse on an invalid filehandle + Thanks to Slaven Rezic for the bug report and test case + https://rt.cpan.org/Public/Bug/Display.html?id=97461 + +- fixed: RT #98801 + COMPATIBILITY WARNING + inconsistency between simplify and XML::Simple for empty elements (including + elements with start and end tags but no contents) + the XML::Simple behaviour is to map them to an empty hash, not an + empty/undef scalar (depending of whether the element is a PCDATA or not) + as was the case in previous versions of the module. + This has the potential to break some existing code, but simplify should be + strictly the same as XML::Simple's XMLin + Thanks to Vangelis Katsikaros for the bug report and test case + https://rt.cpan.org/Public/Bug/Display.html?id=98801 + 3.48 - 2014-03-30 - minor maintenance release -- fixed tests +- fixed: tests 3.47 - 2014-03-27 - minor maintenance release @@ -29,7 +64,7 @@ - fixed: RT #86651 https://rt.cpan.org/Ticket/Display.html?id=86773 xml_pp, quote not escaped in attribute values -- fixed various typos in docs RT#87660 +- fixed: various typos in docs RT#87660 thanks to David Steinbrunner - fixed: RT #86773 https://rt.cpan.org/Ticket/Display.html?id=86773 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/MANIFEST new/XML-Twig-3.49/MANIFEST --- old/XML-Twig-3.48/MANIFEST 2014-03-30 10:54:19.000000000 +0200 +++ new/XML-Twig-3.49/MANIFEST 2015-04-12 10:56:56.000000000 +0200 @@ -129,6 +129,7 @@ t/test_3_44.t t/test_3_45.t t/test_3_47.t +t/test_3_48.t t/test_changes.t t/test_memory.t t/test_wrapped.t diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/META.json new/XML-Twig-3.49/META.json --- old/XML-Twig-3.48/META.json 2014-03-30 10:54:19.000000000 +0200 +++ new/XML-Twig-3.49/META.json 2015-04-12 10:56:56.000000000 +0200 @@ -4,7 +4,7 @@ "Michel Rodriguez <[email protected]>" ], "dynamic_config" : 1, - "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380", + "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.140640", "license" : [ "perl_5" ], @@ -42,5 +42,5 @@ "url" : "http://github.com/mirod/xmltwig" } }, - "version" : "3.48" + "version" : "3.49" } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/META.yml new/XML-Twig-3.49/META.yml --- old/XML-Twig-3.48/META.yml 2014-03-30 10:54:19.000000000 +0200 +++ new/XML-Twig-3.49/META.yml 2015-04-12 10:56:56.000000000 +0200 @@ -3,22 +3,22 @@ author: - 'Michel Rodriguez <[email protected]>' build_requires: - ExtUtils::MakeMaker: 0 + ExtUtils::MakeMaker: '0' configure_requires: - ExtUtils::MakeMaker: 0 + ExtUtils::MakeMaker: '0' dynamic_config: 1 -generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380' +generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.140640' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: 1.4 + version: '1.4' name: XML-Twig no_index: directory: - t - inc requires: - XML::Parser: 2.23 + XML::Parser: '2.23' resources: repository: http://github.com/mirod/xmltwig -version: 3.48 +version: '3.49' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/Twig/XPath.pm new/XML-Twig-3.49/Twig/XPath.pm --- old/XML-Twig-3.48/Twig/XPath.pm 2014-03-30 10:54:15.000000000 +0200 +++ new/XML-Twig-3.49/Twig/XPath.pm 2015-04-12 10:55:21.000000000 +0200 @@ -17,7 +17,8 @@ $VERSION="0.02"; BEGIN -{ package XML::XPath::NodeSet; +{ package # hide from PAUSE + XML::XPath::NodeSet; no warnings; # to avoid the "Subroutine sort redefined" message # replace the native sort routine by a Twig'd one sub sort @@ -26,7 +27,8 @@ return $self; } - package XML::XPathEngine::NodeSet; + package # hide from PAUSE + XML::XPathEngine::NodeSet; no warnings; # to avoid the "Subroutine sort redefined" message # replace the native sort routine by a Twig'd one sub sort diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/Twig.pm new/XML-Twig-3.49/Twig.pm --- old/XML-Twig-3.48/Twig.pm 2014-03-30 10:54:15.000000000 +0200 +++ new/XML-Twig-3.49/Twig.pm 2015-04-12 10:55:21.000000000 +0200 @@ -144,7 +144,7 @@ BEGIN { -$VERSION = '3.48'; +$VERSION = '3.49'; use XML::Parser; my $needVersion = '2.23'; @@ -403,7 +403,7 @@ TopDownHandlers => 1, KeepEncoding => 1, DoNotEscapeAmpInAtts => 1, ParseStartTag => 1, KeepAttsOrder => 1, - LoadDTD => 1, DTDHandler => 1, + LoadDTD => 1, DTDHandler => 1, DTDBase => 1, NoXxe => 1, DoNotOutputDTD => 1, NoProlog => 1, ExpandExternalEnts => 1, DiscardSpaces => 1, KeepSpaces => 1, DiscardAllSpaces => 1, @@ -533,8 +533,14 @@ else { $self->set_expand_external_entities( 0); } + #if( $args{noXxe}) { $self->{twig_no_xxe}= 1; delete $args{noXxe}; } + if( !$args{NoLWP} && ! _use( 'URI') && ! _use( 'URI::File') && ! _use( 'LWP')) { $self->{twig_ext_ent_handler}= \&XML::Parser::initial_ext_ent_handler } + elsif( $args{NoXxe}) + { $self->{twig_ext_ent_handler}= + sub { my($xp, $base, $path) = @_; $xp->{ErrorMessage}.= "cannot use entities in document when the no_xxe option is on"; return undef; }; + } else { $self->{twig_ext_ent_handler}= \&XML::Parser::file_ext_ent_handler } @@ -673,7 +679,7 @@ else { $self->set_output_text_filter( 0); } - if( exists $args{KeepAttsOrder}) + if( $args{KeepAttsOrder}) { $self->{keep_atts_order}= $args{KeepAttsOrder}; if( _use( 'Tie::IxHash')) { $self->set_keep_atts_order( $self->{keep_atts_order}); } @@ -763,6 +769,7 @@ if( !$t && $@=~m{(syntax error at line 1, column 0, byte 0|not well-formed \(invalid token\) at line 1, column 1, byte 1)} && -f $_[0] + && ( ! ref( $_[0]) || ref( $_[0])) ne 'GLOB' # -f works on a filehandle, so this make sure $_[0] is a real file ) { croak "you seem to have used the parse method on a filename ($_[0]), you probably want parsefile instead"; } return _checked_parse_result( $t, $@); @@ -796,7 +803,16 @@ sub finish_now { my $t= shift; $t->{twig_finish_now}=1; - die $t; + # XML::Parser 2.43 changed xpcroak in a way that caused test failures for XML::Twig + # the change was reverted in 2.44, but this is here to ensure that tests pass with 2.43 + if( $XML::Parser::VERSION == 2.43) + { no warnings; + $t->parser->{twig_error}= $t; + *XML::Parser::Expat::xpcroak= sub { die $_[0]->{twig_error}; }; + die $t; + } + else + { die $t; } } @@ -2080,7 +2096,7 @@ $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; unless( $parent->{first_child}) { $parent->{first_child}= $elt; } - $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; + delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } else { # processing root @@ -2508,7 +2524,7 @@ $twig_current->{next_sibling}= $cdata; my $parent= $twig_current->{parent}; $cdata->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; - $parent->{empty}=0; $parent->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; + delete $parent->{empty}; $parent->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; $t->{twig_in_pcdata}=0; } else @@ -2525,7 +2541,7 @@ } else { $twig_current->{first_child}= $cdata; } - $twig_current->{empty}=0; $twig_current->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; + delete $twig_current->{empty}; $twig_current->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } @@ -2547,7 +2563,7 @@ my $elt= $t->{twig_current}; delete $elt->{'twig_current'}; my $cdata= $elt->{cdata}; - $elt->_set_cdata( $cdata); + $elt->{cdata}= $cdata; push @{$t->{_twig_context_stack}}, { $ST_TAG => $CDATA }; @@ -2746,7 +2762,7 @@ my $elt; if( exists $t->{twig_alt_elt_class}) { $elt= $t->{twig_elt_class}->new( $PCDATA); - $elt->_set_pcdata( $string); + $elt->{pcdata}= $string; } else { $elt= bless( { gi => $XML::Twig::gi2index{$PCDATA}, pcdata => $string }, 'XML::Twig::Elt'); } @@ -2760,7 +2776,7 @@ { $parent->{first_child}= $elt; } $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; - $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; + delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; $t->{twig_stored_spaces}=''; return $elt; } @@ -2898,7 +2914,17 @@ # now check if we want to get the DTD info if( $t->{twig_read_external_dtd} && $sysid) { # let's build a fake document with an internal DTD - my $dtd= "<!DOCTYPE $name [" . _slurp_uri( $sysid) . "]><$name/>"; + if( $t->{DTDBase}) + { _use( 'File::Spec'); + $sysid=File::Spec->catfile($t->{DTDBase}, $sysid); + } + my $dtd= _slurp_uri( $sysid); + # if the DTD includes an XML declaration, it needs to be moved before the DOCTYPE bit + if( $dtd=~ s{^(\s*<\?xml(\s+\w+\s*=\s*("[^"]*"|'[^']*'))*\s*\?>)}{}) + { $dtd= "$1<!DOCTYPE $name [$dtd]><$name/>"; } + else + { $dtd= "<!DOCTYPE $name [$dtd]><$name/>"; } + #my $dtd= "<!DOCTYPE $name [" . _slurp_uri( $sysid) . "]><$name/>"; $t->save_global_state(); # save the globals (they will be reset by the following new) my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig @@ -3021,7 +3047,7 @@ $twig_current->{next_sibling}= $ent; my $parent= $twig_current->{parent}; $ent->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; - $parent->{empty}=0; $parent->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; + delete $parent->{empty}; $parent->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; # the twig_current is now the parent delete $twig_current->{'twig_current'}; $t->{twig_current}= $parent; @@ -3037,7 +3063,7 @@ } else { if( $twig_current) { $twig_current->{first_child}= $ent; } } - if( $twig_current) { $twig_current->{empty}=0; $twig_current->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } + if( $twig_current) { delete $twig_current->{empty}; $twig_current->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } } # meant to trigger entity handler, does not seem to be activated at this time @@ -3342,7 +3368,7 @@ if( $up_to) { $last_elt= $up_to; } elsif( $t->{twig_current}) - { $last_elt= $t->{twig_current}->_last_child; } + { $last_elt= $t->{twig_current}->{last_child}; } else { $last_elt= $t->{twig_root}; $flush_trailing_data=1; @@ -3351,7 +3377,7 @@ # flush the DTD unless it has ready flushed (ie root has been flushed) my $elt= $t->{twig_root}; - unless( $elt->_flushed) + unless( $elt->{'flushed'}) { # store flush info so we can auto-flush later if( $t->{twig_autoflush}) { $t->{twig_autoflush_data}={}; @@ -3366,10 +3392,10 @@ { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { - unless( $elt->_flushed) + unless( $elt->{'flushed'}) { # just output the front tag print $elt->start_tag(); - $elt->_set_flushed; + $elt->{'flushed'}=1; } $next_elt= $elt->{first_child}; } @@ -3466,7 +3492,7 @@ if( $up_to) { $last_elt= $up_to; } elsif( $t->{twig_current}) - { $last_elt= $t->{twig_current}->_last_child; } + { $last_elt= $t->{twig_current}->{last_child}; } else { $last_elt= $t->{twig_root}; } @@ -3475,7 +3501,7 @@ while( $elt) { my $next_elt; if( $last_elt && $last_elt->in( $elt)) - { $elt->_set_flushed; + { $elt->{'flushed'}=1; $next_elt= $elt->{first_child}; } else @@ -4075,7 +4101,7 @@ print {$fh} $tag if( $tag); pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start _twig_start( $p, $gi, @_); - $t->root->_set_flushed; # or the root start tag gets output the first time we flush + $t->root->{'flushed'}=1; # or the root start tag gets output the first time we flush } elsif( $t->{twig_starttag_handlers}) { # look for start tag handlers @@ -4213,7 +4239,7 @@ if( ! $t->{twig_ignore_level}) { $t->{twig_current} = $t->{twig_ignore_elt}; - $t->{twig_current}->set_twig_current; + $t->{twig_current}->{'twig_current'}=1; $t->{twig_ignore_elt}->cut; # there could possibly be a memory leak here (delete would avoid it, # but could also delete elements that should not be deleted) @@ -4420,12 +4446,12 @@ # the "real" last element processed, as _twig_end has closed it my $last_elt; if( $t->{twig_current}) - { $last_elt= $t->{twig_current}->_last_child; } + { $last_elt= $t->{twig_current}->{last_child}; } else { $last_elt= $t->{twig_root}; } my $elt= $t->{twig_root}; - unless( $elt->_flushed) + unless( $elt->{'flushed'}) { # init unless already done (ie root has been flushed) if( my $start_document = $handler->can( 'start_document')) { $start_document->( $handler); } @@ -4437,13 +4463,13 @@ { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { - unless( $elt->_flushed) + unless( $elt->{'flushed'}) { # just output the front tag if( my $start_element = $handler->can( 'start_element')) { if( my $tag_data= $start_tag_data->( $elt)) { $start_element->( $handler, $tag_data); } } - $elt->_set_flushed; + $elt->{'flushed'}=1; } $next_elt= $elt->{first_child}; } @@ -4954,21 +4980,21 @@ if( $gi eq $PCDATA) { if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; } - $elt->_set_pcdata( join( '', @_)); + $elt->{pcdata}= join '', @_; } elsif( $gi eq $ENT) { $elt->{ent}= shift; } elsif( $gi eq $CDATA) { if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; } - $elt->_set_cdata( join( '', @_)); + $elt->{cdata}= join '', @_; } elsif( $gi eq $COMMENT) { if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; } - $elt->_set_comment( join( '', @_)); + $elt->{comment}= join '', @_; } elsif( $gi eq $PI) { if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; } - $elt->_set_pi( shift, join( '', @_)); + $elt->_set_pi( shift, join '', @_); } else { # the rest of the arguments are the content of the element @@ -4996,7 +5022,7 @@ my $elt = {}; bless $elt, $class; $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); - $elt->_set_pcdata( $_[1]); + $elt->{pcdata}= $_[1]; return $elt; } @@ -5231,7 +5257,7 @@ { $descendant->{asis}= 1; if( (exists $descendant->{'cdata'})) { $descendant->{gi}=$XML::Twig::gi2index{$PCDATA} or $descendant->set_gi( $PCDATA); - $descendant->_set_pcdata( $descendant->{cdata}); + $descendant->{pcdata}= $descendant->{cdata}; } } @@ -5464,7 +5490,8 @@ { $elt->cut_children; $elt->{gi}=$XML::Twig::gi2index{$COMMENT} or $elt->set_gi( $COMMENT); } - return $elt->_set_comment( @_); + $elt->{comment}= $_[0]; + return $elt; } sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; } @@ -5490,7 +5517,8 @@ $elt->insert_new_elt( first_child => $CDATA, @_); return $elt; } - return $elt->_set_cdata( @_); + $elt->{cdata}= $_[0]; + return $_[0]; } sub _set_cdata @@ -5790,6 +5818,7 @@ sub set_last_child { $_[0]->{'last_child'}= $_[1]; + delete $_->[0]->{empty}; if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); } } @@ -5894,7 +5923,7 @@ { my $elt= shift; my %atts; tie %atts, 'Tie::IxHash' if( keep_atts_order()); - %atts= ( (ref( $_[0] || '') eq 'HASH') || isa( $_[0] || '', 'HASH')) ? %{$_[0]} : @_; + %atts= @_ == 1 ? %{$_[0]} : @_; $elt->{att}= \%atts; if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); } return $elt; @@ -6203,7 +6232,7 @@ # case where elt is subtree_root, is empty and has no sibling return undef if( $subtree_root && ($elt == $subtree_root)); - $next_elt= $elt->{parent}; + $next_elt= $elt->{parent} || return undef; until( $next_elt->{next_sibling}) { return undef if( $subtree_root && ($subtree_root == $next_elt)); @@ -6418,13 +6447,13 @@ sub prev_sibling_text { my $elt= shift; - my $dest=$elt->_prev_sibling(@_) or return ''; + my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->text; } sub prev_sibling_trimmed_text { my $elt= shift; - my $dest=$elt->_prev_sibling(@_) or return ''; + my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->trimmed_text; } @@ -6436,13 +6465,13 @@ sub next_sibling_text { my $elt= shift; - my $dest=$elt->next_sibling(@_) or return ''; + my $dest= $elt->next_sibling(@_) or return ''; return $dest->text; } sub next_sibling_trimmed_text { my $elt= shift; - my $dest=$elt->next_sibling(@_) or return ''; + my $dest= $elt->next_sibling(@_) or return ''; return $dest->trimmed_text; } @@ -6454,13 +6483,13 @@ sub prev_elt_text { my $elt= shift; - my $dest=$elt->prev_elt(@_) or return ''; + my $dest= $elt->prev_elt(@_) or return ''; return $dest->text; } sub prev_elt_trimmed_text { my $elt= shift; - my $dest=$elt->prev_elt(@_) or return ''; + my $dest= $elt->prev_elt(@_) or return ''; return $dest->trimmed_text; } @@ -6472,13 +6501,13 @@ sub next_elt_text { my $elt= shift; - my $dest=$elt->next_elt(@_) or return ''; + my $dest= $elt->next_elt(@_) or return ''; return $dest->text; } sub next_elt_trimmed_text { my $elt= shift; - my $dest=$elt->next_elt(@_) or return ''; + my $dest= $elt->next_elt(@_) or return ''; return $dest->trimmed_text; } @@ -6490,13 +6519,13 @@ sub parent_text { my $elt= shift; - my $dest=$elt->parent(@_) or return ''; + my $dest= $elt->parent(@_) or return ''; return $dest->text; } sub parent_trimmed_text { my $elt= shift; - my $dest=$elt->parent(@_) or return ''; + my $dest= $elt->parent(@_) or return ''; return $dest->trimmed_text; } @@ -6858,7 +6887,7 @@ } } #warn "step: '$step'"; - $sub .= "\@results= grep { \$_ } map { $step } \@results;"; + $sub .= "\@results= grep defined, map { $step } \@results;"; } } } @@ -6926,7 +6955,6 @@ { my $elt= shift; my( $parent, $prev_sibling, $next_sibling); $parent= $elt->{parent}; - my $a= $elt->{'att'}->{'a'} || 'na'; if( ! $parent && $elt->is_elt) { # are we cutting the root? my $t= $elt->{twig}; @@ -6960,7 +6988,7 @@ } if( $parent->{last_child} && $parent->{last_child} == $elt) - { $parent->{empty}=0; $parent->{last_child}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; + { delete $parent->{empty}; $parent->{last_child}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } if( $prev_sibling= $elt->{prev_sibling}) @@ -7003,13 +7031,48 @@ } - sub erase { my $elt= shift; #you cannot erase the current element if( $elt->{twig_current}) { croak "trying to erase an element before it has been completely parsed"; } - unless( $elt->{parent}) + if( my $parent= $elt->{parent}) + { # normal case + $elt->_move_extra_data_after_erase; + my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; + if( @children) + { + # elt has children, move them up + + # the first child may need to be merged with a previous text + my $first_child= shift @children; + $first_child->move( before => $elt); + my $prev= $first_child->{prev_sibling}; + if( $prev && $prev->is_text && ($XML::Twig::index2gi[$first_child->{'gi'}] eq $XML::Twig::index2gi[$prev->{'gi'}]) ) + { $prev->merge_text( $first_child); } + + # move the rest of the children + foreach my $child (@children) + { $child->move( before => $elt); } + + # now the elt had no child, delete it + $elt->delete; + + # now see if we need to merge the last child with the next element + my $last_child= $children[-1] || $first_child; # if no last child, then it's also the first child + my $next= $last_child->{next_sibling}; + if( $next && $next->is_text && ($XML::Twig::index2gi[$last_child->{'gi'}] eq $XML::Twig::index2gi[$next->{'gi'}]) ) + { $last_child->merge_text( $next); } + + # if parsing and have now a PCDATA text, mark so we can normalize later on if need be + if( $parent->{twig_current} && $last_child->is_text) { $parent->{twig_to_be_normalized}=1; } + } + else + { # no children, just cut the elt + $elt->delete; + } + } + else { # trying to erase the root (of a twig or of a cut/new element) my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; unless( @children == 1) @@ -7020,63 +7083,7 @@ my $twig= $elt->twig; $twig->set_root( $child); } - else - { # normal case - $elt->_move_extra_data_after_erase; - my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; - if( @children) - { # elt has children, move them up - - my $first_child= $elt->{first_child}; - my $prev_sibling=$elt->{prev_sibling}; - if( $prev_sibling) - { # connect first child to previous sibling - $first_child->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $first_child->{prev_sibling});} ; - $prev_sibling->{next_sibling}= $first_child; - } - else - { # elt was the first child - $elt->{parent}->set_first_child( $first_child); - } - - my $last_child= $elt->{last_child}; - my $next_sibling= $elt->{next_sibling}; - if( $next_sibling) - { # connect last child to next sibling - $last_child->{next_sibling}= $next_sibling; - $next_sibling->{prev_sibling}=$last_child; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; - } - else - { # elt was the last child - $elt->{parent}->set_last_child( $last_child); - } - # update parent for all siblings - foreach my $child (@children) - { $child->{parent}=$elt->{parent}; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ; } - - # merge consecutive text elements if need be - if( $prev_sibling && $prev_sibling->is_text && ($XML::Twig::index2gi[$first_child->{'gi'}] eq $XML::Twig::index2gi[$prev_sibling->{'gi'}]) ) - { $prev_sibling->merge_text( $first_child); } - if( $next_sibling && $next_sibling->is_text && ($XML::Twig::index2gi[$last_child->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}]) ) - { $last_child->merge_text( $next_sibling); } - # if parsing and have now a PCDATA text, mark so we can normalize later on if need be - if( $elt->{parent}->{twig_current} && $elt->{last_child}->is_text) { $elt->{parent}->{twig_to_be_normalized}=1; } - - # elt is not referenced any more, so it will be DESTROYed - # so we'd better break the links to its children ## FIX - undef $elt->{first_child}; - undef $elt->{last_child}; - undef $elt->{parent}; - undef $elt->{next_sibling}; - undef $elt->{prev_sibling}; - - } - { # elt had no child, delete it - $elt->delete; - } - - } return $elt; } @@ -7223,7 +7230,7 @@ $next_sibling= $ref->{next_sibling}; $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; - if( $parent->{last_child}== $ref) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } + if( $parent->{last_child}== $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } $prev_sibling->{next_sibling}= $elt; $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; @@ -7242,7 +7249,7 @@ $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; $parent->{first_child}= $elt; - unless( $parent->{last_child}) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } + unless( $parent->{last_child}) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; @@ -7258,7 +7265,7 @@ $prev_sibling= $ref->{last_child}; $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; - $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; + delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; unless( $parent->{first_child}) { $parent->{first_child}= $elt; } $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; @@ -7424,12 +7431,17 @@ || ( $nb_children{$child_gi} > 1) ) { # simplify element to store in an array - $data->{$child_gi} ||= []; - push @{$data->{$child_gi}}, $child_data; + if( defined $child_data && $child_data ne "" ) + { $data->{$child_gi} ||= []; + push @{$data->{$child_gi}}, $child_data; + } + else + { $data->{$child_gi}= [{}]; } } else { # simplify element to store as a hash field - $data->{$child_gi}= $child_data; + $data->{$child_gi}=$child_data; + $data->{$child_gi}= defined $child_data && $child_data ne "" ? $child_data : {}; } } } @@ -7823,13 +7835,13 @@ if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } } elsif( (exists $elt->{'cdata'})) - { $copy->_set_cdata( $elt->{cdata}); + { $copy->{cdata}= $elt->{cdata}; if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } } elsif( (exists $elt->{'target'})) { $copy->_set_pi( $elt->{target}, $elt->{data}); } elsif( (exists $elt->{'comment'})) - { $copy->_set_comment( $elt->{comment}); } + { $copy->{comment}= $elt->{comment}; } elsif( (exists $elt->{'ent'})) { $copy->{ent}= $elt->{ent}; } else @@ -8471,7 +8483,7 @@ if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) { my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; $xml_space_preserve++ if $preserve; - unless( $elt->_flushed) + unless( $elt->{'flushed'}) { print $elt->start_tag(); } @@ -8845,7 +8857,7 @@ { my $data= $start_tag_data->( $elt); _start_prefix_mapping( $elt, $handler, $data); if( $data && (my $start_element = $handler->can( 'start_element'))) - { unless( $elt->_flushed) { $start_element->( $handler, $data); } } + { unless( $elt->{'flushed'}) { $start_element->( $handler, $data); } } foreach my $child ($elt->_children) { $child->_toSAX( $handler, $start_tag_data, $end_tag_data); } @@ -8997,11 +9009,13 @@ elsif( $XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) { if( $option{force_pcdata}) { $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); - $elt->_set_cdata(''); + $elt->{cdata}= ''; return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; } else - { return $elt->_set_cdata( $string); } + { $elt->{cdata}= $string; + return $string; + } } elsif( $elt->contains_a_single( $PCDATA) ) { # optimized so we have a slight chance of not loosing embedded comments and pi's @@ -9015,7 +9029,7 @@ my $pcdata= $elt->_new_pcdata( $string); $pcdata->paste( $elt); - $elt->{empty}=0; + delete $elt->{empty}; return $elt; } @@ -9046,7 +9060,7 @@ return $elt; } elsif( ($XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) && (@_ == 1) && !( ref $_[0])) - { $elt->_set_cdata( $_[0]); + { $elt->{cdata}= $_[0]; return $elt; } @@ -9054,7 +9068,7 @@ foreach my $child (@{[$elt->_children]}) { $child->delete; } - if( @_) { $elt->{empty}=0; } + if( @_) { delete $elt->{empty}; } foreach my $child (@_) { if( ref( $child) && isa( $child, 'XML::Twig::Elt')) @@ -9096,7 +9110,7 @@ { $new_elt->set_atts( shift @args); } # paste the element $new_elt->paste( $elt); - $elt->{empty}=0; + delete $elt->{empty}; $elt= $new_elt; } # paste back the children @@ -9144,7 +9158,7 @@ if( my $parent= $elt->{parent}) { $new_elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $new_elt->{parent});} ; if( $parent->{first_child} == $elt) { $parent->{first_child}= $new_elt; } - if( $parent->{last_child} == $elt) { $parent->{empty}=0; $parent->{last_child}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } + if( $parent->{last_child} == $elt) { delete $parent->{empty}; $parent->{last_child}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } } else { # wrapping the root @@ -9164,7 +9178,7 @@ $next_sibling->{prev_sibling}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } $new_elt->{first_child}= $elt; - $new_elt->{empty}=0; $new_elt->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $new_elt->{last_child});} ; + delete $new_elt->{empty}; $new_elt->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $new_elt->{last_child});} ; $elt->{parent}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; @@ -9188,7 +9202,7 @@ if( my $parent= $ref->{parent}) { $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } - if( $parent->{last_child} == $ref) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } + if( $parent->{last_child} == $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } } elsif( $ref->twig && $ref == $ref->twig->root) { $ref->twig->set_root( $elt); } @@ -9605,7 +9619,8 @@ # at most one div will be loaded in memory my $twig=XML::Twig->new( twig_handlers => - { title => sub { $_->set_tag( 'h2') }, # change title tags to h2 + { title => sub { $_->set_tag( 'h2') }, # change title tags to h2 + # $_ is the current element para => sub { $_->set_tag( 'p') }, # change para to p hidden => sub { $_->delete; }, # remove hidden elements list => \&my_list_process, # process list elements @@ -9744,7 +9759,7 @@ # the element (including all its sub-elements) as arguments sub section { my( $t, $section)= @_; # arguments for all twig_handlers - $section->set_tag( 'div'); # change the tag name.4, my favourite method... + $section->set_tag( 'div'); # change the tag name # let's use the attribute nb as a prefix to the title my $title= $section->first_child( 'title'); # find the title my $nb= $title->{'att'}->{'nb'}; # get the attribute @@ -10446,6 +10461,12 @@ See L<DTD Handling> for more information +=item DTD_base <path_to_DTD_directory> + +If the DTD is in a different directory, looks for it there, useful to make up +somewhat for the lack of catalog suport in C<expat>. You still need a SYSTEM +declaration + =item DTD_handler Set a handler that will be called once the doctype (and the DTD) have been diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/Twig_pm.slow new/XML-Twig-3.49/Twig_pm.slow --- old/XML-Twig-3.48/Twig_pm.slow 2014-03-30 10:50:04.000000000 +0200 +++ new/XML-Twig-3.49/Twig_pm.slow 2015-04-12 09:46:57.000000000 +0200 @@ -144,7 +144,7 @@ BEGIN { -$VERSION = '3.48'; +$VERSION = '3.49'; use XML::Parser; my $needVersion = '2.23'; @@ -403,7 +403,7 @@ TopDownHandlers => 1, KeepEncoding => 1, DoNotEscapeAmpInAtts => 1, ParseStartTag => 1, KeepAttsOrder => 1, - LoadDTD => 1, DTDHandler => 1, + LoadDTD => 1, DTDHandler => 1, DTDBase => 1, NoXxe => 1, DoNotOutputDTD => 1, NoProlog => 1, ExpandExternalEnts => 1, DiscardSpaces => 1, KeepSpaces => 1, DiscardAllSpaces => 1, @@ -533,8 +533,14 @@ else { $self->set_expand_external_entities( 0); } + #if( $args{noXxe}) { $self->{twig_no_xxe}= 1; delete $args{noXxe}; } + if( !$args{NoLWP} && ! _use( 'URI') && ! _use( 'URI::File') && ! _use( 'LWP')) { $self->{twig_ext_ent_handler}= \&XML::Parser::initial_ext_ent_handler } + elsif( $args{NoXxe}) + { $self->{twig_ext_ent_handler}= + sub { my($xp, $base, $path) = @_; $xp->{ErrorMessage}.= "cannot use entities in document when the no_xxe option is on"; return undef; }; + } else { $self->{twig_ext_ent_handler}= \&XML::Parser::file_ext_ent_handler } @@ -673,7 +679,7 @@ else { $self->set_output_text_filter( 0); } - if( exists $args{KeepAttsOrder}) + if( $args{KeepAttsOrder}) { $self->{keep_atts_order}= $args{KeepAttsOrder}; if( _use( 'Tie::IxHash')) { $self->set_keep_atts_order( $self->{keep_atts_order}); } @@ -763,6 +769,7 @@ if( !$t && $@=~m{(syntax error at line 1, column 0, byte 0|not well-formed \(invalid token\) at line 1, column 1, byte 1)} && -f $_[0] + && ( ! ref( $_[0]) || ref( $_[0])) ne 'GLOB' # -f works on a filehandle, so this make sure $_[0] is a real file ) { croak "you seem to have used the parse method on a filename ($_[0]), you probably want parsefile instead"; } return _checked_parse_result( $t, $@); @@ -796,7 +803,16 @@ sub finish_now { my $t= shift; $t->{twig_finish_now}=1; - die $t; + # XML::Parser 2.43 changed xpcroak in a way that caused test failures for XML::Twig + # the change was reverted in 2.44, but this is here to ensure that tests pass with 2.43 + if( $XML::Parser::VERSION == 2.43) + { no warnings; + $t->parser->{twig_error}= $t; + *XML::Parser::Expat::xpcroak= sub { die $_[0]->{twig_error}; }; + die $t; + } + else + { die $t; } } @@ -2898,7 +2914,17 @@ # now check if we want to get the DTD info if( $t->{twig_read_external_dtd} && $sysid) { # let's build a fake document with an internal DTD - my $dtd= "<!DOCTYPE $name [" . _slurp_uri( $sysid) . "]><$name/>"; + if( $t->{DTDBase}) + { _use( 'File::Spec'); + $sysid=File::Spec->catfile($t->{DTDBase}, $sysid); + } + my $dtd= _slurp_uri( $sysid); + # if the DTD includes an XML declaration, it needs to be moved before the DOCTYPE bit + if( $dtd=~ s{^(\s*<\?xml(\s+\w+\s*=\s*("[^"]*"|'[^']*'))*\s*\?>)}{}) + { $dtd= "$1<!DOCTYPE $name [$dtd]><$name/>"; } + else + { $dtd= "<!DOCTYPE $name [$dtd]><$name/>"; } + #my $dtd= "<!DOCTYPE $name [" . _slurp_uri( $sysid) . "]><$name/>"; $t->save_global_state(); # save the globals (they will be reset by the following new) my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig @@ -4954,21 +4980,21 @@ if( $gi eq $PCDATA) { if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; } - $elt->_set_pcdata( join( '', @_)); + $elt->_set_pcdata( join '', @_); } elsif( $gi eq $ENT) { $elt->set_ent( shift); } elsif( $gi eq $CDATA) { if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; } - $elt->_set_cdata( join( '', @_)); + $elt->_set_cdata( join '', @_); } elsif( $gi eq $COMMENT) { if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; } - $elt->_set_comment( join( '', @_)); + $elt->_set_comment( join '', @_); } elsif( $gi eq $PI) { if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; } - $elt->_set_pi( shift, join( '', @_)); + $elt->_set_pi( shift, join '', @_); } else { # the rest of the arguments are the content of the element @@ -5464,7 +5490,8 @@ { $elt->cut_children; $elt->set_gi( $COMMENT); } - return $elt->_set_comment( @_); + $elt->_set_comment( $_[0]); + return $elt; } sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; } @@ -5490,7 +5517,8 @@ $elt->insert_new_elt( first_child => $CDATA, @_); return $elt; } - return $elt->_set_cdata( @_); + $elt->_set_cdata( $_[0]); + return $_[0]; } sub _set_cdata @@ -5790,6 +5818,7 @@ sub set_last_child { $_[0]->{'last_child'}= $_[1]; + delete $_->[0]->{empty}; if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); } } @@ -5894,7 +5923,7 @@ { my $elt= shift; my %atts; tie %atts, 'Tie::IxHash' if( keep_atts_order()); - %atts= ( (ref( $_[0] || '') eq 'HASH') || isa( $_[0] || '', 'HASH')) ? %{$_[0]} : @_; + %atts= @_ == 1 ? %{$_[0]} : @_; $elt->{att}= \%atts; if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); } return $elt; @@ -6203,7 +6232,7 @@ # case where elt is subtree_root, is empty and has no sibling return undef if( $subtree_root && ($elt == $subtree_root)); - $next_elt= $elt->_parent; + $next_elt= $elt->_parent || return undef; until( $next_elt->_next_sibling) { return undef if( $subtree_root && ($subtree_root == $next_elt)); @@ -6418,13 +6447,13 @@ sub prev_sibling_text { my $elt= shift; - my $dest=$elt->_prev_sibling(@_) or return ''; + my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->text; } sub prev_sibling_trimmed_text { my $elt= shift; - my $dest=$elt->_prev_sibling(@_) or return ''; + my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->trimmed_text; } @@ -6436,13 +6465,13 @@ sub next_sibling_text { my $elt= shift; - my $dest=$elt->next_sibling(@_) or return ''; + my $dest= $elt->next_sibling(@_) or return ''; return $dest->text; } sub next_sibling_trimmed_text { my $elt= shift; - my $dest=$elt->next_sibling(@_) or return ''; + my $dest= $elt->next_sibling(@_) or return ''; return $dest->trimmed_text; } @@ -6454,13 +6483,13 @@ sub prev_elt_text { my $elt= shift; - my $dest=$elt->prev_elt(@_) or return ''; + my $dest= $elt->prev_elt(@_) or return ''; return $dest->text; } sub prev_elt_trimmed_text { my $elt= shift; - my $dest=$elt->prev_elt(@_) or return ''; + my $dest= $elt->prev_elt(@_) or return ''; return $dest->trimmed_text; } @@ -6472,13 +6501,13 @@ sub next_elt_text { my $elt= shift; - my $dest=$elt->next_elt(@_) or return ''; + my $dest= $elt->next_elt(@_) or return ''; return $dest->text; } sub next_elt_trimmed_text { my $elt= shift; - my $dest=$elt->next_elt(@_) or return ''; + my $dest= $elt->next_elt(@_) or return ''; return $dest->trimmed_text; } @@ -6490,13 +6519,13 @@ sub parent_text { my $elt= shift; - my $dest=$elt->parent(@_) or return ''; + my $dest= $elt->parent(@_) or return ''; return $dest->text; } sub parent_trimmed_text { my $elt= shift; - my $dest=$elt->parent(@_) or return ''; + my $dest= $elt->parent(@_) or return ''; return $dest->trimmed_text; } @@ -6858,7 +6887,7 @@ } } #warn "step: '$step'"; - $sub .= "\@results= grep { \$_ } map { $step } \@results;"; + $sub .= "\@results= grep defined, map { $step } \@results;"; } } } @@ -6926,7 +6955,6 @@ { my $elt= shift; my( $parent, $prev_sibling, $next_sibling); $parent= $elt->_parent; - my $a= $elt->att( 'a') || 'na'; if( ! $parent && $elt->is_elt) { # are we cutting the root? my $t= $elt->{twig}; @@ -7003,13 +7031,48 @@ } - sub erase { my $elt= shift; #you cannot erase the current element if( $elt->{twig_current}) { croak "trying to erase an element before it has been completely parsed"; } - unless( $elt->_parent) + if( my $parent= $elt->_parent) + { # normal case + $elt->_move_extra_data_after_erase; + my @children= $elt->_children; + if( @children) + { + # elt has children, move them up + + # the first child may need to be merged with a previous text + my $first_child= shift @children; + $first_child->move( before => $elt); + my $prev= $first_child->prev_sibling; + if( $prev && $prev->is_text && ($first_child->gi eq $prev->gi) ) + { $prev->merge_text( $first_child); } + + # move the rest of the children + foreach my $child (@children) + { $child->move( before => $elt); } + + # now the elt had no child, delete it + $elt->delete; + + # now see if we need to merge the last child with the next element + my $last_child= $children[-1] || $first_child; # if no last child, then it's also the first child + my $next= $last_child->next_sibling; + if( $next && $next->is_text && ($last_child->gi eq $next->gi) ) + { $last_child->merge_text( $next); } + + # if parsing and have now a PCDATA text, mark so we can normalize later on if need be + if( $parent->{twig_current} && $last_child->is_text) { $parent->{twig_to_be_normalized}=1; } + } + else + { # no children, just cut the elt + $elt->delete; + } + } + else { # trying to erase the root (of a twig or of a cut/new element) my @children= $elt->_children; unless( @children == 1) @@ -7020,63 +7083,7 @@ my $twig= $elt->twig; $twig->set_root( $child); } - else - { # normal case - $elt->_move_extra_data_after_erase; - my @children= $elt->_children; - if( @children) - { # elt has children, move them up - - my $first_child= $elt->first_child; - my $prev_sibling=$elt->_prev_sibling; - if( $prev_sibling) - { # connect first child to previous sibling - $first_child->set_prev_sibling( $prev_sibling); - $prev_sibling->set_next_sibling( $first_child); - } - else - { # elt was the first child - $elt->_parent->set_first_child( $first_child); - } - - my $last_child= $elt->_last_child; - my $next_sibling= $elt->_next_sibling; - if( $next_sibling) - { # connect last child to next sibling - $last_child->set_next_sibling( $next_sibling); - $next_sibling->set_prev_sibling( $last_child); - } - else - { # elt was the last child - $elt->_parent->set_last_child( $last_child); - } - # update parent for all siblings - foreach my $child (@children) - { $child->set_parent( $elt->_parent); } - - # merge consecutive text elements if need be - if( $prev_sibling && $prev_sibling->is_text && ($first_child->gi eq $prev_sibling->gi) ) - { $prev_sibling->merge_text( $first_child); } - if( $next_sibling && $next_sibling->is_text && ($last_child->gi eq $next_sibling->gi) ) - { $last_child->merge_text( $next_sibling); } - # if parsing and have now a PCDATA text, mark so we can normalize later on if need be - if( $elt->_parent->{twig_current} && $elt->_last_child->is_text) { $elt->_parent->{twig_to_be_normalized}=1; } - - # elt is not referenced any more, so it will be DESTROYed - # so we'd better break the links to its children ## FIX - undef $elt->{first_child}; - undef $elt->{last_child}; - undef $elt->{parent}; - undef $elt->{next_sibling}; - undef $elt->{prev_sibling}; - - } - { # elt had no child, delete it - $elt->delete; - } - - } return $elt; } @@ -7424,12 +7431,17 @@ || ( $nb_children{$child_gi} > 1) ) { # simplify element to store in an array - $data->{$child_gi} ||= []; - push @{$data->{$child_gi}}, $child_data; + if( defined $child_data && $child_data ne "" ) + { $data->{$child_gi} ||= []; + push @{$data->{$child_gi}}, $child_data; + } + else + { $data->{$child_gi}= [{}]; } } else { # simplify element to store as a hash field - $data->{$child_gi}= $child_data; + $data->{$child_gi}=$child_data; + $data->{$child_gi}= defined $child_data && $child_data ne "" ? $child_data : {}; } } } @@ -9001,7 +9013,9 @@ return $elt->set_pcdata( $string); } else - { return $elt->_set_cdata( $string); } + { $elt->_set_cdata( $string); + return $string; + } } elsif( $elt->contains_a_single( $PCDATA) ) { # optimized so we have a slight chance of not loosing embedded comments and pi's @@ -9605,7 +9619,8 @@ # at most one div will be loaded in memory my $twig=XML::Twig->new( twig_handlers => - { title => sub { $_->set_tag( 'h2') }, # change title tags to h2 + { title => sub { $_->set_tag( 'h2') }, # change title tags to h2 + # $_ is the current element para => sub { $_->set_tag( 'p') }, # change para to p hidden => sub { $_->delete; }, # remove hidden elements list => \&my_list_process, # process list elements @@ -9744,7 +9759,7 @@ # the element (including all its sub-elements) as arguments sub section { my( $t, $section)= @_; # arguments for all twig_handlers - $section->set_tag( 'div'); # change the tag name.4, my favourite method... + $section->set_tag( 'div'); # change the tag name # let's use the attribute nb as a prefix to the title my $title= $section->first_child( 'title'); # find the title my $nb= $title->att( 'nb'); # get the attribute @@ -10446,6 +10461,12 @@ See L<DTD Handling> for more information +=item DTD_base <path_to_DTD_directory> + +If the DTD is in a different directory, looks for it there, useful to make up +somewhat for the lack of catalog suport in C<expat>. You still need a SYSTEM +declaration + =item DTD_handler Set a handler that will be called once the doctype (and the DTD) have been diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/speedup new/XML-Twig-3.49/speedup --- old/XML-Twig-3.48/speedup 2013-04-29 13:26:21.000000000 +0200 +++ new/XML-Twig-3.49/speedup 2014-06-11 09:23:43.000000000 +0200 @@ -7,12 +7,12 @@ ); # _$private is inlined my $FORMER = join( '|', qw( parent prev_sibling next_sibling)); # former_$former is inlined my $SET_FIELD = join( '|', qw( first_child next_sibling ent data pctarget comment flushed)); -my $SET_NOT_EMPTY= join( '|', qw( pcdata cdata)); # set the field and mark as not empty +my $SET_NOT_EMPTY= join( '|', qw( pcdata cdata comment)); # set the field # depending on the version of perl use either qr or "" print STDERR "perl version is $]\n"; -my $var= '(\$[a-z_]+(?:\[\d\])?)'; +my $var= '(\$[a-z_]+(?:\[\d\])?|\$t(?:wig)?->root|\$t(?:wig)?->twig_current|\$t(?:wig)?->{\'?twig_root\'?}|\$t(?:wig)?->{\'?twig_current\'?})'; my $set_to = '(?:undef|\$\w+|\$\w+->\{\w+\}|\$\w+->\w+|\$\w+->\w+\([^)]+\))'; my $elt = '\$(?:elt|new_elt|child|cdata|ent|_?parent|twig_current|next_sibling|first_child|prev_sibling|last_child|ref|elt->_parent)'; @@ -40,8 +40,11 @@ s/$var->set_gi\(\s*(PCDATA|CDATA|PI|COMMENT|ENT)\s*\)/$1\->{gi}= $gi2index{$2}/; - s/$var->del_(twig_current|flushed)/delete $1\->{'$2'}/g; - s/$var->set_(twig_current|flushed)/$1\->{'$2'}=1/g; + s/$var->del_(twig_current)/delete $1\->{'$2'}/g; + s/$var->set_(twig_current)/$1\->{'$2'}=1/g; + s/$var->_del_(flushed)/delete $1\->{'$2'}/g; + s/$var->_set_(flushed)/$1\->{'$2'}=1/g; + s/$var->_(flushed)/$1\->{'$2'}/g; s/$var->set_($SET_FIELD)\(([^)]*)\)/$1\->\{$2\}= $3/g; s/$var->($FIELD)\b(?!\()/$1\->\{$2\}/g; @@ -59,6 +62,7 @@ s/$var->append_(pcdata|cdata)\(([^)]*)\)/$1\->\{$2\}.= $3/g; s/$var->set_($SET_NOT_EMPTY)\(([^)]*)\)/$1\->\{$2\}= (delete $1->\{empty\} || 1) && $3/g; + s/$var->_set_($SET_NOT_EMPTY)\s*\(([^)]*)\)/$1\->{$2}= $3/g; s/(\$[a-z][a-z_]*(?:\[\d\])?)->gi/\$XML::Twig::index2gi\[$1\->{'gi'}\]/g; @@ -74,7 +78,7 @@ s/$var->is_empty/$1\->{'empty'}/g; s/$var->set_empty(?:\(([^)]*)\))?(?!_)/"$1\->{empty}= " . ($2 || 1)/ge; - s/$var->set_not_empty/$1\->{empty}=0/g; + s/$var->set_not_empty/delete $1\->{empty}/g; #s/$var->set_not_empty/delete $1\->{empty}/g; s/$var->_is_private/( (substr( \$XML::Twig::index2gi\[$1\->{'gi'}\], 0, 1) eq '#') && (substr( \$XML::Twig::index2gi\[$1\->{'gi'}\], 0, 9) ne '#default:') )/g; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/t/test_3_48.t new/XML-Twig-3.49/t/test_3_48.t --- old/XML-Twig-3.48/t/test_3_48.t 1970-01-01 01:00:00.000000000 +0100 +++ new/XML-Twig-3.49/t/test_3_48.t 2014-06-11 09:23:41.000000000 +0200 @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use XML::Twig; +use Test::More tests => 1; + +use utf8; + +{ +XML::Twig::_disallow_use( 'Tie::IxHash'); +my $t; +eval { $t= XML::Twig->new( keep_atts_order => 0); }; +ok( $t, 'keep_atts_order => 0'); +} + + +exit; + + + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/t/test_additional.t new/XML-Twig-3.49/t/test_additional.t --- old/XML-Twig-3.48/t/test_additional.t 2012-08-06 18:48:54.000000000 +0200 +++ new/XML-Twig-3.49/t/test_additional.t 2014-06-11 09:23:41.000000000 +0200 @@ -55,24 +55,24 @@ $SIG{__WARN__} = sub { $warning.= join '', @_ }; XML::Twig->new( dummy_opt => 1); $SIG{__WARN__}= $old_warning_handler; -ok( $warning=~ m{^invalid option DummyOpt}, "expecting 'invalid option DummyOpt...', got '$warning'\n");# test 5 +chomp $warning; +matches( $warning, qr{^invalid option DummyOpt}, "expecting 'invalid option DummyOpt...', got '$warning'");# test 5 # test no warming if more_options is used $warning=""; $SIG{__WARN__} = sub { $warning.= join '', @_ }; XML::Twig->new( more_options => 1, dummy_opt => 1); $SIG{__WARN__}= $old_warning_handler; -nok( $warning, "expecting no warning, got '$warning'\n");# test 6 +nok( $warning, "expecting no warning, got '$warning'");# test 6 $warning=""; $SIG{__WARN__} = sub { $warning.= join '', @_ }; XML::Twig::add_options( 'dummy_opt'); XML::Twig->new( dummy_opt => 1); $SIG{__WARN__}= $old_warning_handler; -nok( $warning, "expecting no warning (2), got '$warning'\n");# test 7 +nok( $warning, "expecting no warning (2), got '$warning'");# test 7 } - { # test do_not_chain_handlers my $nb_calls=0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/t/test_erase.t new/XML-Twig-3.49/t/test_erase.t --- old/XML-Twig-3.48/t/test_erase.t 2012-05-17 12:24:59.000000000 +0200 +++ new/XML-Twig-3.49/t/test_erase.t 2014-06-11 09:23:40.000000000 +0200 @@ -39,30 +39,30 @@ <test1> <elt><erase/></elt> <elt>text <erase/></elt> - <elt><erase/> text</elt> - <elt>text <erase/> text</elt> + <elt><erase/> text (1)</elt> + <elt>text <erase/> text (2)</elt> <elt><child/><erase/><child/></elt> <elt><erase/><child/></elt> <elt><child/><erase/></elt> </test1> <!-- erase an element with 1 text child --> <test2> - <elt><erase>text</erase></elt> - <elt>text <erase>text</erase></elt> - <elt><erase>text</erase> text</elt> - <elt>text <erase>text</erase> text</elt> - <elt><child/><erase>text</erase><child/></elt> - <elt><erase>text</erase><child/></elt> + <elt><erase>text (3)</erase></elt> + <elt>text <erase>text (4)</erase></elt> + <elt><erase>text (5)</erase> text (6)</elt> + <elt>text (7)<erase>text (8)</erase> text (9)</elt> + <elt><child/><erase>text (10)</erase><child/></elt> + <elt><erase>text (11)</erase><child/></elt> <elt><child/><erase>text</erase></elt> </test2> <!-- erase an element with several children --> <test3> - <elt><erase><child>text</child><child/></erase></elt> - <elt>text <erase><child>text</child><child/></erase></elt> - <elt><erase><child>text</child><child/></erase> text</elt> - <elt>text <erase><child>text</child><child/></erase> text</elt> - <elt><child/><erase><child>text</child><child/></erase>child/></elt> - <elt><erase><child>text</child><child/></erase>child/></elt> - <elt><child/><erase><child>text</child><child/></erase></elt> + <elt><erase><child>text (12)</child><child/></erase></elt> + <elt>text (13)<erase><child>text (14)</child><child/></erase></elt> + <elt><erase><child>text (15)</child><child/></erase> text (16)</elt> + <elt>text (17)<erase><child>text (18)</child><child/></erase> text (19)</elt> + <elt><child/><erase><child>text (20)</child><child/></erase>child/></elt> + <elt><erase><child>text (21)</child><child/></erase>child/></elt> + <elt><child/><erase><child>text (22)</child><child/></erase></elt> </test3> </doc> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/t/test_errors.t new/XML-Twig-3.49/t/test_errors.t --- old/XML-Twig-3.48/t/test_errors.t 2013-05-04 08:03:56.000000000 +0200 +++ new/XML-Twig-3.49/t/test_errors.t 2014-10-05 09:04:53.000000000 +0200 @@ -14,7 +14,7 @@ use XML::Twig; -my $TMAX=119; +my $TMAX=121; print "1..$TMAX\n"; my $error_file= File::Spec->catfile('t','test_errors.errors'); @@ -337,6 +337,14 @@ matches( $@, "error in xpath expression", 'error in xpath expression //foo/following::'); } +# tests for https://rt.cpan.org/Public/Bug/Display.html?id=97461 (wrong error message due to filehandle seen as a file) +{ eval { XML::Twig->new->parse( do { open( my $fh, '<', $0); $fh}); }; + not_matches( $@, "you seem to have used the parse method on a filename", "parse on a filehandle containing invalid XML"); + open FOO, "<$0"; + eval { XML::Twig->new->parse( \*FOO); }; + not_matches( $@, "you seem to have used the parse method on a filename", "parse on a GLOBAL filehandle containing invalid XML"); +} + exit 0; sub can_check_for_pipes diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/t/test_simplify.t new/XML-Twig-3.49/t/test_simplify.t --- old/XML-Twig-3.48/t/test_simplify.t 2012-05-17 12:24:59.000000000 +0200 +++ new/XML-Twig-3.49/t/test_simplify.t 2014-10-05 09:55:08.000000000 +0200 @@ -68,7 +68,7 @@ delete $options_simple->{var_regexp}; my $simple = XMLin( $doc, %$options_simple); my $res=is_deeply( $twig, $simple, "doc: $doc_name - options: $options_text" ); #. Dump( {twig => $twig, simple => $simple})); - exit unless( $res); + #exit unless( $res); } } @@ -115,3 +115,14 @@ <f1_ar><f1>f1 1</f1><f1>f1 2</f1></f1_ar> <f2_ar><f2>f2 1</f2><f2>f2 2</f2></f2_ar> </doc> + +<doc doc="empty elements test"> + <section><elt><![CDATA[something]]></elt></section> + <section><elt><![CDATA[0]]></elt></section> + <section><elt><![CDATA[]]></elt></section> + <section><elt/></section> + <section><elt>something</elt></section> + <section><elt>0</elt></section> + <section><elt></elt></section> +</doc> + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/XML-Twig-3.48/t/tools.pm new/XML-Twig-3.49/t/tools.pm --- old/XML-Twig-3.48/t/tools.pm 2012-06-15 15:42:05.000000000 +0200 +++ new/XML-Twig-3.49/t/tools.pm 2014-10-05 08:59:20.000000000 +0200 @@ -77,6 +77,23 @@ } } + sub not_matches + { my $got = shift; my $expected_regexp= shift; my $message = shift; + $test_nb++; + + if( $got!~ /$expected_regexp/) + { print "ok $test_nb"; + print " $message" if( $TDEBUG); + print "\n"; + return 1; + } + else { print "not ok $test_nb\n"; + warn "$message: expected to NOT match /$expected_regexp/, got '$got'\n"; + croak if $TFATAL; + return 0; + } + } + sub ok { my $cond = shift; my $message=shift; $test_nb++; ++++++ cpanspec.yml ++++++ --- #description_paragraphs: 3 #no_testing: broken upstream #sources: # - source1 # - source2 #patches: # foo.patch: -p1 # bar.patch: preamble: |- BuildRequires: expat BuildRequires: perl-HTML-Tidy BuildRequires: perl-IO-CaptureOutput BuildRequires: perl-Test-Pod BuildRequires: perl-Text-Wrapper BuildRequires: perl-Tie-IxHash BuildRequires: perl-XML-Filter-BufferText BuildRequires: perl-XML-Handler-YAWriter BuildRequires: perl-XML-Parser BuildRequires: perl-XML-SAX-Writer BuildRequires: perl-XML-Simple BuildRequires: perl-XML-XPath BuildRequires: perl-XML-XPathEngine Requires: expat Requires: perl-XML-Parser Requires: perl(Encode) BuildRequires: perl-HTML-Tidy BuildRequires: perl-Text-Wrapper BuildRequires: perl-Tie-IxHash BuildRequires: perl-XML-XPath BuildRequires: perl-XML-XPathEngine
