This is a patch that implements this; enable by setting the $p->backquote() attribute method. Will show up in HTML-Parser 3.57.
? HTML-Parser-3.55-threads.diff ? HTML-Parser-3.55.tar.gz ? HTML-Parser-3.56.tar.gz ? HTML-Tagset-3.10 ? xxx Index: Parser.pm =================================================================== RCS file: /cvsroot/libwww-perl/html-parser/Parser.pm,v retrieving revision 2.218 diff -u -p -u -r2.218 Parser.pm --- Parser.pm 6 Feb 2007 18:09:54 -0000 2.218 +++ Parser.pm 6 Feb 2007 19:43:45 -0000 @@ -329,6 +329,14 @@ By default, the C<attr> and C<@attr> arg entities for attribute values decoded. Enabling this attribute leaves entities alone. +=item $p->backquote + +=item $p->backquote( $bool ) + +By default, only ' and " are recognized as quote characters around +attribute values. MSIE also recognize backquotes for some reason. +Enabling this attribute provide compatiblity with this behaviour. + =item $p->boolean_attribute_value( $val ) This method sets the value reported for boolean attributes inside HTML Index: Parser.xs =================================================================== RCS file: /cvsroot/libwww-perl/html-parser/Parser.xs,v retrieving revision 2.137 diff -u -p -u -r2.137 Parser.xs --- Parser.xs 12 Jan 2007 10:18:39 -0000 2.137 +++ Parser.xs 6 Feb 2007 19:43:45 -0000 @@ -265,6 +265,7 @@ dup_pstate(pTHX_ PSTATE *pstate, CLONE_P pstate2->utf8_mode = pstate->utf8_mode; pstate2->empty_element_tags = pstate->empty_element_tags; pstate2->xml_pic = pstate->xml_pic; + pstate2->backquote = pstate->backquote; pstate2->bool_attr_val = SvREFCNT_inc(sv_dup(pstate->bool_attr_val, params)); @@ -447,6 +448,7 @@ strict_comment(pstate,...) HTML::Parser::utf8_mode = 10 HTML::Parser::empty_element_tags = 11 HTML::Parser::xml_pic = 12 + HTML::Parser::backquote = 13 PREINIT: bool *attr; CODE: @@ -470,8 +472,9 @@ strict_comment(pstate,...) #else case 10: croak("The utf8_mode does not work with this perl; perl-5.8 or better required"); #endif - case 11: attr = &pstate->empty_element_tags; break; + case 11: attr = &pstate->empty_element_tags; break; case 12: attr = &pstate->xml_pic; break; + case 13: attr = &pstate->backquote; break; default: croak("Unknown boolean attribute (%d)", ix); } Index: hparser.c =================================================================== RCS file: /cvsroot/libwww-perl/html-parser/hparser.c,v retrieving revision 2.134 diff -u -p -u -r2.134 hparser.c --- hparser.c 12 Jan 2007 10:54:06 -0000 2.134 +++ hparser.c 6 Feb 2007 19:43:46 -0000 @@ -455,7 +455,7 @@ report_event(PSTATE* p_state, if (tokens[i+1].beg) { char *beg = tokens[i+1].beg; STRLEN len = tokens[i+1].end - beg; - if (*beg == '"' || *beg == '\'') { + if (*beg == '"' || *beg == '\'' || (*beg == '`' && p_state->backquote)) { assert(len >= 2 && *beg == beg[len-1]); beg++; len -= 2; } @@ -1166,7 +1166,7 @@ parse_decl(PSTATE* p_state, char *beg, c if (s == end) goto PREMATURE; - if (*s == '"' || *s == '\'') { + if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) { char *str_beg = s; s++; while (s < end && *s != *str_beg) @@ -1337,7 +1337,7 @@ parse_start(PSTATE* p_state, char *beg, PUSH_TOKEN(s, s); break; } - if (*s == '"' || *s == '\'') { + if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) { char *str_beg = s; s++; while (s < end && *s != *str_beg) Index: hparser.h =================================================================== RCS file: /cvsroot/libwww-perl/html-parser/hparser.h,v retrieving revision 2.34 diff -u -p -u -r2.34 hparser.h --- hparser.h 26 Apr 2006 07:01:10 -0000 2.34 +++ hparser.h 6 Feb 2007 19:43:46 -0000 @@ -109,6 +109,7 @@ struct p_state { bool utf8_mode; bool empty_element_tags; bool xml_pic; + bool backquote; /* other configuration stuff */ SV* bool_attr_val; Index: t/msie-compat.t =================================================================== RCS file: /cvsroot/libwww-perl/html-parser/t/msie-compat.t,v retrieving revision 1.7 diff -u -p -u -r1.7 msie-compat.t --- t/msie-compat.t 9 Jun 2006 07:59:38 -0000 1.7 +++ t/msie-compat.t 6 Feb 2007 19:43:46 -0000 @@ -3,13 +3,13 @@ use strict; use HTML::Parser; -use Test::More tests => 2; +use Test::More tests => 4; my $TEXT = ""; sub h { - my($event, $tagname, $text) = @_; - for ($event, $tagname, $text) { + my($event, $tagname, $text, @attr) = @_; + for ($event, $tagname, $text, @attr) { if (defined) { s/([\n\r\t])/sprintf "\\%03o", ord($1)/ge; } @@ -18,10 +18,10 @@ sub h } } - $TEXT .= "[$event,$tagname,$text]\n"; + $TEXT .= "[$event,$tagname,$text," . join(":", @attr) . "]\n"; } -my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text"]); +my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text,[EMAIL PROTECTED]"]); $p->parse("<a>"); $p->parse("</a f>"); $p->parse("</a 'foo<>' 'bar>' x>"); @@ -33,18 +33,18 @@ $p->parse("<!--comment>text<!--comment>< $p->eof; is($TEXT, <<'EOT'); -[start_document,<undef>,] -[start,a,<a>] -[end,a,</a f>] -[end,a,</a 'foo<>' 'bar>' x>] -[end,a,</a "foo<>" "bar>" x>] -[comment, foo bar,</ foo bar>] -[comment, "<>" ,</ "<>" >] -[comment,comment,<!--comment>] -[text,<undef>,text] -[comment,comment,<!--comment>] -[comment,p,<p] -[end_document,<undef>,] +[start_document,<undef>,,] +[start,a,<a>,] +[end,a,</a f>,] +[end,a,</a 'foo<>' 'bar>' x>,] +[end,a,</a "foo<>" "bar>" x>,] +[comment, foo bar,</ foo bar>,] +[comment, "<>" ,</ "<>" >,] +[comment,comment,<!--comment>,] +[text,<undef>,text,] +[comment,comment,<!--comment>,] +[comment,p,<p,] +[end_document,<undef>,,] EOT $TEXT = ""; @@ -52,7 +52,28 @@ $p->parse("<!comment>"); $p->eof; is($TEXT, <<'EOT'); -[start_document,<undef>,] -[comment,comment,<!comment>] -[end_document,<undef>,] +[start_document,<undef>,,] +[comment,comment,<!comment>,] +[end_document,<undef>,,] +EOT + +$TEXT = ""; +$p->parse(q(<a name=`foo bar`>)); +$p->eof; + +is($TEXT, <<'EOT'); +[start_document,<undef>,,] +[start,a,<a name=`foo bar`>,name:`foo:bar`:bar`] +[end_document,<undef>,,] +EOT + +$p->backquote(1); +$TEXT = ""; +$p->parse(q(<a name=`foo bar`>)); +$p->eof; + +is($TEXT, <<'EOT'); +[start_document,<undef>,,] +[start,a,<a name=`foo bar`>,name:foo bar] +[end_document,<undef>,,] EOT