A few days back we kicked around the idea of having one module
containing all the facts about all the HTML tags.  Here's my first
stab at such a thing, made by just combining things from Element,
TreeBuilder, and LinkExtor.  Thoughts, anyone?

(Suggestions for the name are especially welcome.  So far I've
considered HTML::Tags, but that's bad because it's too close to
HTML::Element, and HTML::DocType, which is not so good because it
implicates this is a DTD, or the kind of information you'd find in a
DTD.)

# Time-stamp: "1999-12-27 15:58:38 MST [EMAIL PROTECTED]"
package HTML::Known;
use strict;

# 'can tighten' is true for all tags such that when whitespace is
# between any two such tags (whether opening or closing), intervening
# whitespace may be ignored while parsing an HTML document, or may be
# introduced when emitting HTML.

$HTML::Known::Version = '1.1';
%HTML::Known::Tags = 
(
 'a' => {
  'in_body' => 1,
  'link_attributes' => ['href'],
  'link_attributes_h' => {'href' => 1},
  'phrasal' => 1,
  },
 'abbr' => { 'in_body' => 1, 'phrasal' => 1 },
 'acronym' => { 'in_body' => 1, 'phrasal' => 1 },
 'address' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'applet' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['archive', 'codebase', 'code'],
  'link_attributes_h' => {'archive' => 1, 'code' => 1, 'codebase' => 1},
  },
 'area' => {
  'boolean_attributes' => ['nohref'],
  'boolean_attributes_h' => {'nohref' => 1},
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  'in_head_or_body' => 1,
  'link_attributes' => ['href'],
  'link_attributes_h' => {'href' => 1},
  },
 'b' => { 'in_body' => 1, 'phrasal' => 1 },
 'base' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_head' => 1,
  'link_attributes' => ['href'],
  'link_attributes_h' => {'href' => 1},
  },
 'basefont' => {
  'empty' => 1,
  'in_body' => 1,
  'phrasal' => 1,
  },
 'bdo' => { 'in_body' => 1, 'phrasal' => 1 },
 'bgsound' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  'in_head' => 1,
  'in_head_or_body' => 1,
  'link_attributes' => ['src'],
  'link_attributes_h' => {'src' => 1},
  },
 'big' => { 'in_body' => 1, 'phrasal' => 1 },
 'blink' => { 'in_body' => 1, 'phrasal' => 1 },
 'blockquote' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['cite'],
  'link_attributes_h' => {'cite' => 1},
  },
 'body' => {
  'can_tighten' => 1,
  'link_attributes' => ['background'],
  'link_attributes_h' => {'background' => 1},
  },
 'br' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  'phrasal' => 1,
  },
 'button' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'caption' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'center' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'cite' => { 'in_body' => 1, 'phrasal' => 1 },
 'code' => { 'in_body' => 1, 'phrasal' => 1 },
 'col' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  },
 'colgroup' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'dd' => {
  'can_tighten' => 1,
  'end_optional' => 1,
  'in_body' => 1,
  },
 'del' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['cite'],
  'link_attributes_h' => {'cite' => 1},
  },
 'dfn' => { 'in_body' => 1, 'phrasal' => 1 },
 'dir' => {
  'boolean_attributes' => ['compact'],
  'boolean_attributes_h' => {'compact' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'div' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'dl' => {
  'boolean_attributes' => ['compact'],
  'boolean_attributes_h' => {'compact' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'dt' => {
  'can_tighten' => 1,
  'end_optional' => 1,
  'in_body' => 1,
  },
 'em' => { 'in_body' => 1, 'phrasal' => 1 },
 'embed' => {
  'empty' => 1,
  'in_body' => 1,
  'link_attributes' => ['pluginspage', 'src'],
  'link_attributes_h' => {'pluginspage' => 1, 'src' => 1},
  'phrasal' => 1,
  },
 'fieldset' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'font' => { 'in_body' => 1, 'phrasal' => 1 },
 'form' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['action'],
  'link_attributes_h' => {'action' => 1},
  },
 'frame' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  'link_attributes' => ['src', 'longdesc'],
  'link_attributes_h' => {'longdesc' => 1, 'src' => 1},
  },
 'frameset' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'h1' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'h2' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'h3' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'h4' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'h5' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'h6' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'head' => {
  'can_tighten' => 1,
  'link_attributes' => ['profile'],
  'link_attributes_h' => {'profile' => 1},
  },
 'hr' => {
  'boolean_attributes' => ['noshade'],
  'boolean_attributes_h' => {'noshade' => 1},
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  },
 'html' => {
  'can_tighten' => 1,
  },
 'i' => { 'in_body' => 1, 'phrasal' => 1 },
 'iframe' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['src', 'longdesc'],
  'link_attributes_h' => {'longdesc' => 1, 'src' => 1},
  },
 'ilayer' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['background'],
  'link_attributes_h' => {'background' => 1},
  },
 'img' => {
  'boolean_attributes' => ['ismap'],
  'boolean_attributes_h' => {'ismap' => 1},
  'empty' => 1,
  'in_body' => 1,
  'link_attributes' => ['src', 'lowsrc', 'longdesc', 'usemap'],
  'link_attributes_h' => {'longdesc' => 1, 'lowsrc' => 1, 'src' => 1, 'usemap' => 1},
  'phrasal' => 1,
  },
 'input' => {
  'boolean_attributes' => ['disabled', 'readonly', 'checked'],
  'boolean_attributes_h' => {'checked' => 1, 'disabled' => 1, 'readonly' => 1},
  'empty' => 1,
  'in_body' => 1,
  'link_attributes' => ['src', 'usemap'],
  'link_attributes_h' => {'src' => 1, 'usemap' => 1},
  },
 'ins' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['cite'],
  'link_attributes_h' => {'cite' => 1},
  },
 'isindex' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  'in_head' => 1,
  'in_head_or_body' => 1,
  'link_attributes' => ['action'],
  'link_attributes_h' => {'action' => 1},
  },
 'kbd' => { 'in_body' => 1, 'phrasal' => 1 },
 'label' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'layer' => {
  'link_attributes' => ['background', 'src'],
  'link_attributes_h' => {'background' => 1, 'src' => 1},
  },
 'legend' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'li' => {
  'can_tighten' => 1,
  'end_optional' => 1,
  'in_body' => 1,
  },
 'link' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_head' => 1,
  'link_attributes' => ['href'],
  'link_attributes_h' => {'href' => 1},
  },
 'listing' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'map' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'in_head_or_body' => 1,
  },
 'menu' => {
  'boolean_attributes' => ['compact'],
  'boolean_attributes_h' => {'compact' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'meta' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_head' => 1,
  },
 'multicol' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'nobr' => { 'in_body' => 1, 'phrasal' => 1 },
 'noembed' => { 'in_body' => 1, 'phrasal' => 1 },
 'noframes' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'nolayer' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'noscript' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'in_head_or_body' => 1,
  },
 'object' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'in_head' => 1,
  'in_head_or_body' => 1,
  'link_attributes' => ['classid', 'codebase', 'data', 'archive', 'usemap'],
  'link_attributes_h' => {'archive' => 1, 'classid' => 1, 'codebase' => 1, 'data' => 
1, 'usemap' => 1},
  },
 'ol' => {
  'boolean_attributes' => ['compact'],
  'boolean_attributes_h' => {'compact' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'optgroup' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'option' => {
  'boolean_attributes' => ['selected'],
  'boolean_attributes_h' => {'selected' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'p' => {
  'can_tighten' => 1,
  'end_optional' => 1,
  'in_body' => 1,
  },
 'param' => {
  'can_tighten' => 1,
  'empty' => 1,
  'in_body' => 1,
  'in_head_or_body' => 1,
  },
 'plaintext' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'pre' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'q' => {
  'in_body' => 1,
  'link_attributes' => ['cite'],
  'link_attributes_h' => {'cite' => 1},
  'phrasal' => 1,
  },
 's' => { 'in_body' => 1, 'phrasal' => 1 },
 'samp' => { 'in_body' => 1, 'phrasal' => 1 },
 'script' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'in_head' => 1,
  'in_head_or_body' => 1,
  'link_attributes' => ['src', 'for'],
  'link_attributes_h' => {'for' => 1, 'src' => 1},
  },
 'select' => {
  'boolean_attributes' => ['multiple'],
  'boolean_attributes_h' => {'multiple' => 1},
  'in_body' => 1,
  },
 'small' => { 'in_body' => 1, 'phrasal' => 1 },
 'spacer' => {
  'empty' => 1,
  'in_body' => 1,
  'phrasal' => 1,
  },
 'span' => { 'in_body' => 1, 'phrasal' => 1 },
 'strike' => { 'in_body' => 1, 'phrasal' => 1 },
 'strong' => { 'in_body' => 1, 'phrasal' => 1 },
 'style' => {
  'can_tighten' => 1,
  'in_head' => 1,
  'in_head_or_body' => 1,
  },
 'sub' => { 'in_body' => 1, 'phrasal' => 1 },
 'sup' => { 'in_body' => 1, 'phrasal' => 1 },
 'table' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['background'],
  'link_attributes_h' => {'background' => 1},
  },
 'tbody' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'td' => {
  'boolean_attributes' => ['nowrap'],
  'boolean_attributes_h' => {'nowrap' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['background'],
  'link_attributes_h' => {'background' => 1},
  },
 'textarea' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'tfoot' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'th' => {
  'boolean_attributes' => ['nowrap'],
  'boolean_attributes_h' => {'nowrap' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['background'],
  'link_attributes_h' => {'background' => 1},
  },
 'thead' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'title' => {
  'can_tighten' => 1,
  'in_head' => 1,
  },
 'tr' => {
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'tt' => { 'in_body' => 1, 'phrasal' => 1 },
 'u' => { 'in_body' => 1, 'phrasal' => 1 },
 'ul' => {
  'boolean_attributes' => ['compact'],
  'boolean_attributes_h' => {'compact' => 1},
  'can_tighten' => 1,
  'in_body' => 1,
  },
 'var' => { 'in_body' => 1, 'phrasal' => 1 },
 'wbr' => {
  'empty' => 1,
  'in_body' => 1,
  'phrasal' => 1,
  },
 'xmp' => {
  'can_tighten' => 1,
  'in_body' => 1,
  'link_attributes' => ['href'],
  'link_attributes_h' => {'href' => 1},
  },
);
1;

__END__
-- 
Sean M. Burke  [EMAIL PROTECTED]  http://www.netadventure.net/~sburke/

Reply via email to