A few days back we kicked around the idea of having one module
containing all the facts about all the HTML tags. Here's my first
stab at such a thing, made by just combining things from Element,
TreeBuilder, and LinkExtor. Thoughts, anyone?
(Suggestions for the name are especially welcome. So far I've
considered HTML::Tags, but that's bad because it's too close to
HTML::Element, and HTML::DocType, which is not so good because it
implicates this is a DTD, or the kind of information you'd find in a
DTD.)
# Time-stamp: "1999-12-27 15:58:38 MST [EMAIL PROTECTED]"
package HTML::Known;
use strict;
# 'can tighten' is true for all tags such that when whitespace is
# between any two such tags (whether opening or closing), intervening
# whitespace may be ignored while parsing an HTML document, or may be
# introduced when emitting HTML.
$HTML::Known::Version = '1.1';
%HTML::Known::Tags =
(
'a' => {
'in_body' => 1,
'link_attributes' => ['href'],
'link_attributes_h' => {'href' => 1},
'phrasal' => 1,
},
'abbr' => { 'in_body' => 1, 'phrasal' => 1 },
'acronym' => { 'in_body' => 1, 'phrasal' => 1 },
'address' => {
'can_tighten' => 1,
'in_body' => 1,
},
'applet' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['archive', 'codebase', 'code'],
'link_attributes_h' => {'archive' => 1, 'code' => 1, 'codebase' => 1},
},
'area' => {
'boolean_attributes' => ['nohref'],
'boolean_attributes_h' => {'nohref' => 1},
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
'in_head_or_body' => 1,
'link_attributes' => ['href'],
'link_attributes_h' => {'href' => 1},
},
'b' => { 'in_body' => 1, 'phrasal' => 1 },
'base' => {
'can_tighten' => 1,
'empty' => 1,
'in_head' => 1,
'link_attributes' => ['href'],
'link_attributes_h' => {'href' => 1},
},
'basefont' => {
'empty' => 1,
'in_body' => 1,
'phrasal' => 1,
},
'bdo' => { 'in_body' => 1, 'phrasal' => 1 },
'bgsound' => {
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
'in_head' => 1,
'in_head_or_body' => 1,
'link_attributes' => ['src'],
'link_attributes_h' => {'src' => 1},
},
'big' => { 'in_body' => 1, 'phrasal' => 1 },
'blink' => { 'in_body' => 1, 'phrasal' => 1 },
'blockquote' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['cite'],
'link_attributes_h' => {'cite' => 1},
},
'body' => {
'can_tighten' => 1,
'link_attributes' => ['background'],
'link_attributes_h' => {'background' => 1},
},
'br' => {
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
'phrasal' => 1,
},
'button' => {
'can_tighten' => 1,
'in_body' => 1,
},
'caption' => {
'can_tighten' => 1,
'in_body' => 1,
},
'center' => {
'can_tighten' => 1,
'in_body' => 1,
},
'cite' => { 'in_body' => 1, 'phrasal' => 1 },
'code' => { 'in_body' => 1, 'phrasal' => 1 },
'col' => {
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
},
'colgroup' => {
'can_tighten' => 1,
'in_body' => 1,
},
'dd' => {
'can_tighten' => 1,
'end_optional' => 1,
'in_body' => 1,
},
'del' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['cite'],
'link_attributes_h' => {'cite' => 1},
},
'dfn' => { 'in_body' => 1, 'phrasal' => 1 },
'dir' => {
'boolean_attributes' => ['compact'],
'boolean_attributes_h' => {'compact' => 1},
'can_tighten' => 1,
'in_body' => 1,
},
'div' => {
'can_tighten' => 1,
'in_body' => 1,
},
'dl' => {
'boolean_attributes' => ['compact'],
'boolean_attributes_h' => {'compact' => 1},
'can_tighten' => 1,
'in_body' => 1,
},
'dt' => {
'can_tighten' => 1,
'end_optional' => 1,
'in_body' => 1,
},
'em' => { 'in_body' => 1, 'phrasal' => 1 },
'embed' => {
'empty' => 1,
'in_body' => 1,
'link_attributes' => ['pluginspage', 'src'],
'link_attributes_h' => {'pluginspage' => 1, 'src' => 1},
'phrasal' => 1,
},
'fieldset' => {
'can_tighten' => 1,
'in_body' => 1,
},
'font' => { 'in_body' => 1, 'phrasal' => 1 },
'form' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['action'],
'link_attributes_h' => {'action' => 1},
},
'frame' => {
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
'link_attributes' => ['src', 'longdesc'],
'link_attributes_h' => {'longdesc' => 1, 'src' => 1},
},
'frameset' => {
'can_tighten' => 1,
'in_body' => 1,
},
'h1' => {
'can_tighten' => 1,
'in_body' => 1,
},
'h2' => {
'can_tighten' => 1,
'in_body' => 1,
},
'h3' => {
'can_tighten' => 1,
'in_body' => 1,
},
'h4' => {
'can_tighten' => 1,
'in_body' => 1,
},
'h5' => {
'can_tighten' => 1,
'in_body' => 1,
},
'h6' => {
'can_tighten' => 1,
'in_body' => 1,
},
'head' => {
'can_tighten' => 1,
'link_attributes' => ['profile'],
'link_attributes_h' => {'profile' => 1},
},
'hr' => {
'boolean_attributes' => ['noshade'],
'boolean_attributes_h' => {'noshade' => 1},
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
},
'html' => {
'can_tighten' => 1,
},
'i' => { 'in_body' => 1, 'phrasal' => 1 },
'iframe' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['src', 'longdesc'],
'link_attributes_h' => {'longdesc' => 1, 'src' => 1},
},
'ilayer' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['background'],
'link_attributes_h' => {'background' => 1},
},
'img' => {
'boolean_attributes' => ['ismap'],
'boolean_attributes_h' => {'ismap' => 1},
'empty' => 1,
'in_body' => 1,
'link_attributes' => ['src', 'lowsrc', 'longdesc', 'usemap'],
'link_attributes_h' => {'longdesc' => 1, 'lowsrc' => 1, 'src' => 1, 'usemap' => 1},
'phrasal' => 1,
},
'input' => {
'boolean_attributes' => ['disabled', 'readonly', 'checked'],
'boolean_attributes_h' => {'checked' => 1, 'disabled' => 1, 'readonly' => 1},
'empty' => 1,
'in_body' => 1,
'link_attributes' => ['src', 'usemap'],
'link_attributes_h' => {'src' => 1, 'usemap' => 1},
},
'ins' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['cite'],
'link_attributes_h' => {'cite' => 1},
},
'isindex' => {
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
'in_head' => 1,
'in_head_or_body' => 1,
'link_attributes' => ['action'],
'link_attributes_h' => {'action' => 1},
},
'kbd' => { 'in_body' => 1, 'phrasal' => 1 },
'label' => {
'can_tighten' => 1,
'in_body' => 1,
},
'layer' => {
'link_attributes' => ['background', 'src'],
'link_attributes_h' => {'background' => 1, 'src' => 1},
},
'legend' => {
'can_tighten' => 1,
'in_body' => 1,
},
'li' => {
'can_tighten' => 1,
'end_optional' => 1,
'in_body' => 1,
},
'link' => {
'can_tighten' => 1,
'empty' => 1,
'in_head' => 1,
'link_attributes' => ['href'],
'link_attributes_h' => {'href' => 1},
},
'listing' => {
'can_tighten' => 1,
'in_body' => 1,
},
'map' => {
'can_tighten' => 1,
'in_body' => 1,
'in_head_or_body' => 1,
},
'menu' => {
'boolean_attributes' => ['compact'],
'boolean_attributes_h' => {'compact' => 1},
'can_tighten' => 1,
'in_body' => 1,
},
'meta' => {
'can_tighten' => 1,
'empty' => 1,
'in_head' => 1,
},
'multicol' => {
'can_tighten' => 1,
'in_body' => 1,
},
'nobr' => { 'in_body' => 1, 'phrasal' => 1 },
'noembed' => { 'in_body' => 1, 'phrasal' => 1 },
'noframes' => {
'can_tighten' => 1,
'in_body' => 1,
},
'nolayer' => {
'can_tighten' => 1,
'in_body' => 1,
},
'noscript' => {
'can_tighten' => 1,
'in_body' => 1,
'in_head_or_body' => 1,
},
'object' => {
'can_tighten' => 1,
'in_body' => 1,
'in_head' => 1,
'in_head_or_body' => 1,
'link_attributes' => ['classid', 'codebase', 'data', 'archive', 'usemap'],
'link_attributes_h' => {'archive' => 1, 'classid' => 1, 'codebase' => 1, 'data' =>
1, 'usemap' => 1},
},
'ol' => {
'boolean_attributes' => ['compact'],
'boolean_attributes_h' => {'compact' => 1},
'can_tighten' => 1,
'in_body' => 1,
},
'optgroup' => {
'can_tighten' => 1,
'in_body' => 1,
},
'option' => {
'boolean_attributes' => ['selected'],
'boolean_attributes_h' => {'selected' => 1},
'can_tighten' => 1,
'in_body' => 1,
},
'p' => {
'can_tighten' => 1,
'end_optional' => 1,
'in_body' => 1,
},
'param' => {
'can_tighten' => 1,
'empty' => 1,
'in_body' => 1,
'in_head_or_body' => 1,
},
'plaintext' => {
'can_tighten' => 1,
'in_body' => 1,
},
'pre' => {
'can_tighten' => 1,
'in_body' => 1,
},
'q' => {
'in_body' => 1,
'link_attributes' => ['cite'],
'link_attributes_h' => {'cite' => 1},
'phrasal' => 1,
},
's' => { 'in_body' => 1, 'phrasal' => 1 },
'samp' => { 'in_body' => 1, 'phrasal' => 1 },
'script' => {
'can_tighten' => 1,
'in_body' => 1,
'in_head' => 1,
'in_head_or_body' => 1,
'link_attributes' => ['src', 'for'],
'link_attributes_h' => {'for' => 1, 'src' => 1},
},
'select' => {
'boolean_attributes' => ['multiple'],
'boolean_attributes_h' => {'multiple' => 1},
'in_body' => 1,
},
'small' => { 'in_body' => 1, 'phrasal' => 1 },
'spacer' => {
'empty' => 1,
'in_body' => 1,
'phrasal' => 1,
},
'span' => { 'in_body' => 1, 'phrasal' => 1 },
'strike' => { 'in_body' => 1, 'phrasal' => 1 },
'strong' => { 'in_body' => 1, 'phrasal' => 1 },
'style' => {
'can_tighten' => 1,
'in_head' => 1,
'in_head_or_body' => 1,
},
'sub' => { 'in_body' => 1, 'phrasal' => 1 },
'sup' => { 'in_body' => 1, 'phrasal' => 1 },
'table' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['background'],
'link_attributes_h' => {'background' => 1},
},
'tbody' => {
'can_tighten' => 1,
'in_body' => 1,
},
'td' => {
'boolean_attributes' => ['nowrap'],
'boolean_attributes_h' => {'nowrap' => 1},
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['background'],
'link_attributes_h' => {'background' => 1},
},
'textarea' => {
'can_tighten' => 1,
'in_body' => 1,
},
'tfoot' => {
'can_tighten' => 1,
'in_body' => 1,
},
'th' => {
'boolean_attributes' => ['nowrap'],
'boolean_attributes_h' => {'nowrap' => 1},
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['background'],
'link_attributes_h' => {'background' => 1},
},
'thead' => {
'can_tighten' => 1,
'in_body' => 1,
},
'title' => {
'can_tighten' => 1,
'in_head' => 1,
},
'tr' => {
'can_tighten' => 1,
'in_body' => 1,
},
'tt' => { 'in_body' => 1, 'phrasal' => 1 },
'u' => { 'in_body' => 1, 'phrasal' => 1 },
'ul' => {
'boolean_attributes' => ['compact'],
'boolean_attributes_h' => {'compact' => 1},
'can_tighten' => 1,
'in_body' => 1,
},
'var' => { 'in_body' => 1, 'phrasal' => 1 },
'wbr' => {
'empty' => 1,
'in_body' => 1,
'phrasal' => 1,
},
'xmp' => {
'can_tighten' => 1,
'in_body' => 1,
'link_attributes' => ['href'],
'link_attributes_h' => {'href' => 1},
},
);
1;
__END__
--
Sean M. Burke [EMAIL PROTECTED] http://www.netadventure.net/~sburke/