This is a patch that implements this; enable by setting the
$p->backquote() attribute method.  Will show up in HTML-Parser 3.57.
? HTML-Parser-3.55-threads.diff
? HTML-Parser-3.55.tar.gz
? HTML-Parser-3.56.tar.gz
? HTML-Tagset-3.10
? xxx
Index: Parser.pm
===================================================================
RCS file: /cvsroot/libwww-perl/html-parser/Parser.pm,v
retrieving revision 2.218
diff -u -p -u -r2.218 Parser.pm
--- Parser.pm	6 Feb 2007 18:09:54 -0000	2.218
+++ Parser.pm	6 Feb 2007 19:43:45 -0000
@@ -329,6 +329,14 @@ By default, the C<attr> and C<@attr> arg
 entities for attribute values decoded.  Enabling this attribute leaves
 entities alone.
 
+=item $p->backquote
+
+=item $p->backquote( $bool )
+
+By default, only ' and " are recognized as quote characters around
+attribute values.  MSIE also recognize backquotes for some reason.
+Enabling this attribute provide compatiblity with this behaviour.
+
 =item $p->boolean_attribute_value( $val )
 
 This method sets the value reported for boolean attributes inside HTML
Index: Parser.xs
===================================================================
RCS file: /cvsroot/libwww-perl/html-parser/Parser.xs,v
retrieving revision 2.137
diff -u -p -u -r2.137 Parser.xs
--- Parser.xs	12 Jan 2007 10:18:39 -0000	2.137
+++ Parser.xs	6 Feb 2007 19:43:45 -0000
@@ -265,6 +265,7 @@ dup_pstate(pTHX_ PSTATE *pstate, CLONE_P
     pstate2->utf8_mode = pstate->utf8_mode;
     pstate2->empty_element_tags = pstate->empty_element_tags;
     pstate2->xml_pic = pstate->xml_pic;
+    pstate2->backquote = pstate->backquote;
 
     pstate2->bool_attr_val =
 	SvREFCNT_inc(sv_dup(pstate->bool_attr_val, params));
@@ -447,6 +448,7 @@ strict_comment(pstate,...)
         HTML::Parser::utf8_mode = 10
         HTML::Parser::empty_element_tags = 11
         HTML::Parser::xml_pic = 12
+	HTML::Parser::backquote = 13
     PREINIT:
 	bool *attr;
     CODE:
@@ -470,8 +472,9 @@ strict_comment(pstate,...)
 #else
 	case 10: croak("The utf8_mode does not work with this perl; perl-5.8 or better required");
 #endif
-	case 11: attr = &pstate->empty_element_tags;    break;
+	case 11: attr = &pstate->empty_element_tags;   break;
         case 12: attr = &pstate->xml_pic;              break;
+	case 13: attr = &pstate->backquote;            break;
 	default:
 	    croak("Unknown boolean attribute (%d)", ix);
         }
Index: hparser.c
===================================================================
RCS file: /cvsroot/libwww-perl/html-parser/hparser.c,v
retrieving revision 2.134
diff -u -p -u -r2.134 hparser.c
--- hparser.c	12 Jan 2007 10:54:06 -0000	2.134
+++ hparser.c	6 Feb 2007 19:43:46 -0000
@@ -455,7 +455,7 @@ report_event(PSTATE* p_state,
 		    if (tokens[i+1].beg) {
 			char *beg = tokens[i+1].beg;
 			STRLEN len = tokens[i+1].end - beg;
-			if (*beg == '"' || *beg == '\'') {
+			if (*beg == '"' || *beg == '\'' || (*beg == '`' && p_state->backquote)) {
 			    assert(len >= 2 && *beg == beg[len-1]);
 			    beg++; len -= 2;
 			}
@@ -1166,7 +1166,7 @@ parse_decl(PSTATE* p_state, char *beg, c
 	    if (s == end)
 		goto PREMATURE;
 
-	    if (*s == '"' || *s == '\'') {
+	    if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) {
 		char *str_beg = s;
 		s++;
 		while (s < end && *s != *str_beg)
@@ -1337,7 +1337,7 @@ parse_start(PSTATE* p_state, char *beg, 
 		PUSH_TOKEN(s, s);
 		break;
 	    }
-	    if (*s == '"' || *s == '\'') {
+	    if (*s == '"' || *s == '\'' || (*s == '`' && p_state->backquote)) {
 		char *str_beg = s;
 		s++;
 		while (s < end && *s != *str_beg)
Index: hparser.h
===================================================================
RCS file: /cvsroot/libwww-perl/html-parser/hparser.h,v
retrieving revision 2.34
diff -u -p -u -r2.34 hparser.h
--- hparser.h	26 Apr 2006 07:01:10 -0000	2.34
+++ hparser.h	6 Feb 2007 19:43:46 -0000
@@ -109,6 +109,7 @@ struct p_state {
     bool utf8_mode;
     bool empty_element_tags;
     bool xml_pic;
+    bool backquote;
 
     /* other configuration stuff */
     SV* bool_attr_val;
Index: t/msie-compat.t
===================================================================
RCS file: /cvsroot/libwww-perl/html-parser/t/msie-compat.t,v
retrieving revision 1.7
diff -u -p -u -r1.7 msie-compat.t
--- t/msie-compat.t	9 Jun 2006 07:59:38 -0000	1.7
+++ t/msie-compat.t	6 Feb 2007 19:43:46 -0000
@@ -3,13 +3,13 @@
 use strict;
 use HTML::Parser;
 
-use Test::More tests => 2;
+use Test::More tests => 4;
 
 my $TEXT = "";
 sub h
 {
-    my($event, $tagname, $text) = @_;
-    for ($event, $tagname, $text) {
+    my($event, $tagname, $text, @attr) = @_;
+    for ($event, $tagname, $text, @attr) {
         if (defined) {
 	    s/([\n\r\t])/sprintf "\\%03o", ord($1)/ge;
 	}
@@ -18,10 +18,10 @@ sub h
 	}
     }
 
-    $TEXT .= "[$event,$tagname,$text]\n";
+    $TEXT .= "[$event,$tagname,$text," . join(":", @attr) . "]\n";
 }
 
-my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text"]);
+my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text,[EMAIL PROTECTED]"]);
 $p->parse("<a>");
 $p->parse("</a f>");
 $p->parse("</a 'foo<>' 'bar>' x>");
@@ -33,18 +33,18 @@ $p->parse("<!--comment>text<!--comment><
 $p->eof;
 
 is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[start,a,<a>]
-[end,a,</a f>]
-[end,a,</a 'foo<>' 'bar>' x>]
-[end,a,</a "foo<>" "bar>" x>]
-[comment, foo bar,</ foo bar>]
-[comment, "<>" ,</ "<>" >]
-[comment,comment,<!--comment>]
-[text,<undef>,text]
-[comment,comment,<!--comment>]
-[comment,p,<p]
-[end_document,<undef>,]
+[start_document,<undef>,,]
+[start,a,<a>,]
+[end,a,</a f>,]
+[end,a,</a 'foo<>' 'bar>' x>,]
+[end,a,</a "foo<>" "bar>" x>,]
+[comment, foo bar,</ foo bar>,]
+[comment, "<>" ,</ "<>" >,]
+[comment,comment,<!--comment>,]
+[text,<undef>,text,]
+[comment,comment,<!--comment>,]
+[comment,p,<p,]
+[end_document,<undef>,,]
 EOT
 
 $TEXT = "";
@@ -52,7 +52,28 @@ $p->parse("<!comment>");
 $p->eof;
 
 is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[comment,comment,<!comment>]
-[end_document,<undef>,]
+[start_document,<undef>,,]
+[comment,comment,<!comment>,]
+[end_document,<undef>,,]
+EOT
+
+$TEXT = "";
+$p->parse(q(<a name=`foo bar`>));
+$p->eof;
+
+is($TEXT, <<'EOT');
+[start_document,<undef>,,]
+[start,a,<a name=`foo bar`>,name:`foo:bar`:bar`]
+[end_document,<undef>,,]
+EOT
+
+$p->backquote(1);
+$TEXT = "";
+$p->parse(q(<a name=`foo bar`>));
+$p->eof;
+
+is($TEXT, <<'EOT');
+[start_document,<undef>,,]
+[start,a,<a name=`foo bar`>,name:foo bar]
+[end_document,<undef>,,]
 EOT

Reply via email to