Change 17668 by pudge@pudge-mobile on 2002/07/30 03:16:52 Help fixes from Thomas Wegner
Affected files ... .... //depot/macperl/macos/macperl/BuildHelpIndex#2 edit .... //depot/macperl/macos/macperl/MacPerl.podhelp#4 edit .... //depot/maint-5.6/macperl/macos/macperl/BuildHelpIndex#2 edit .... //depot/maint-5.6/macperl/macos/macperl/MacPerl.podhelp#10 edit .... //depot/maint-5.8/macperl/macos/macperl/BuildHelpIndex#2 edit .... //depot/maint-5.8/macperl/macos/macperl/MacPerl.podhelp#2 edit Differences ... ==== //depot/macperl/macos/macperl/BuildHelpIndex#2 (text) ==== Index: macperl/macos/macperl/BuildHelpIndex --- macperl/macos/macperl/BuildHelpIndex#1~16123~ Tue Apr 23 18:25:17 2002 +++ macperl/macos/macperl/BuildHelpIndex Mon Jul 29 20:16:52 2002 @@ -3,6 +3,461 @@ #!perl -s +use Pod::Parser; + +package MyPodParseLink; + +############################################################################## +# Since Pod::ParseLink is (a) not part of the standard library prior to Perl +# 5.8, and (b) I needed to make a little change for compatibility with Shuck, +# its code was copied over here. All credits go to Russ Allbery. +############################################################################## + +# Parse the name and section portion of a link into a name and section. +sub _parse_section { + my ($link) = @_; + $link =~ s/^\s+//; + $link =~ s/\s+$//; + + # If the whole link is enclosed in quotes, interpret it all as a section + # even if it contains a slash. + return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); + + # Split into page and section on slash, and then clean up quoting in the + # section. If there is no section and the name contains spaces, also + # guess that it's an old section link. + my ($page, $section) = split (/\s*\/\s*/, $link, 2); + $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; + if ($page && $page =~ / / && !defined ($section)) { + $section = $page; + $page = undef; + } else { + $page = undef unless $page; + $section = undef unless $section; + } + return ($page, $section); +} + +# Infer link text from the page and section. +sub _infer_text { + my ($page, $section) = @_; + my $inferred; + if ($page && !$section) { + $inferred = $page; + } elsif (!$page && $section) { + $inferred = '"' . $section . '"'; + } elsif ($page && $section) { + # The string 'the section ' string is prepended for compatibility with + # Shuck (this is the only difference from Pod::ParseLink) + $inferred = 'the section ' . '"' . $section . '" in ' . $page; + } + return $inferred; +} + +# Given the contents of an L<> formatting code, parse it into the link text, +# the possibly inferred link text, the name or URL, the section, and the type +# of link (pod, man, or url). +sub parselink { + my ($link) = @_; + $link =~ s/\s+/ /g; + if ($link =~ /\A\w+:[^:\s]\S*\Z/) { + return (undef, $link, $link, undef, 'url'); + } else { + my $text; + if ($link =~ /\|/) { + ($text, $link) = split (/\|/, $link, 2); + } + my ($name, $section) = _parse_section ($link); + my $inferred = $text || _infer_text ($name, $section); + my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; + return ($text, $inferred, $name, $section, $type); + } +} + + +############################################################################## +# And now our Pod::Parser subclass, MyPodIndexer +# (A lot of this code was borrowed from Pod::Text, credits to Russ Allbery.) +############################################################################## +package MyPodIndexer; +@ISA = qw(Pod::Parser); + +# Table of supported E<> escapes for the Macintosh standard Roman character set. +# (These are exactly the same escapes as known by Shuck. This list is incomplete +# since the Macintosh standard Roman character set doesn't contain all characters +# defined in the ISO 8559-1 (Latin 1) character set.) +%ESCAPES = ( + 'amp' => '&', # ampersand + 'apos' => "'", # apostrophe + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + 'sol' => '/', # solidus (forward slash) + 'verbar' => '|', # vertical bar + + "Aacute" => "Á", # capital A, acute accent + "aacute" => "á", # small a, acute accent + "Acirc" => "Â", # capital A, circumflex accent + "acirc" => "â", # small a, circumflex accent + "AElig" => "Æ", # capital AE diphthong (ligature) + "aelig" => "æ", # small ae diphthong (ligature) + "Agrave" => "À", # capital A, grave accent + "agrave" => "à", # small a, grave accent + "Aring" => "Å", # capital A, ring + "aring" => "å", # small a, ring + "Atilde" => "Ã", # capital A, tilde + "atilde" => "ã", # small a, tilde + "Auml" => "Ä", # capital A, dieresis or umlaut mark + "auml" => "ä", # small a, dieresis or umlaut mark + "Ccedil" => "Ç", # capital C, cedilla + "ccedil" => "ç", # small c, cedilla + "Eacute" => "É", # capital E, acute accent + "eacute" => "é", # small e, acute accent + "Ecirc" => "Ê", # capital E, circumflex accent + "ecirc" => "ê", # small e, circumflex accent + "Egrave" => "È", # capital E, grave accent + "egrave" => "è", # small e, grave accent + "ETH" => "Eth", # capital Eth, Icelandic + "eth" => "eth", # small eth, Icelandic + "Euml" => "Ë", # capital E, dieresis or umlaut mark + "euml" => "ë", # small e, dieresis or umlaut mark + "Iacute" => "Í", # capital I, acute accent + "iacute" => "í", # small i, acute accent + "Icirc" => "Î", # capital I, circumflex accent + "icirc" => "î", # small i, circumflex accent + "Igrave" => "Ì", # capital I, grave accent + "igrave" => "ì", # small i, grave accent + "Iuml" => "Ï", # capital I, dieresis or umlaut mark + "iuml" => "ï", # small i, dieresis or umlaut mark + "Ntilde" => "Ñ", # capital N, tilde + "ntilde" => "ñ", # small n, tilde + "Oacute" => "Ó", # capital O, acute accent + "oacute" => "ó", # small o, acute accent + "Ocirc" => "Ô", # capital O, circumflex accent + "ocirc" => "ô", # small o, circumflex accent + "Ograve" => "Ò", # capital O, grave accent + "ograve" => "ò", # small o, grave accent + "Oslash" => "Ø", # capital O, slash + "oslash" => "ø", # small o, slash + "Otilde" => "Õ", # capital O, tilde + "otilde" => "õ", # small o, tilde + "Ouml" => "Ö", # capital O, dieresis or umlaut mark + "ouml" => "ö", # small o, dieresis or umlaut mark + "szlig" => "ß", # small sharp s, German (sz ligature) + "THORN" => "Thorn", # capital THORN, Icelandic + "thorn" => "thorn", # small thorn, Icelandic + "Uacute" => "Ú", # capital U, acute accent + "uacute" => "ú", # small u, acute accent + "Ucirc" => "Û", # capital U, circumflex accent + "ucirc" => "û", # small u, circumflex accent + "Ugrave" => "Ù", # capital U, grave accent + "ugrave" => "ù", # small u, grave accent + "Uuml" => "Ü", # capital U, dieresis or umlaut mark + "uuml" => "ü", # small u, dieresis or umlaut mark + "Yacute" => "´Y", # capital Y, acute accent + "yacute" => "´y", # small y, acute accent + "yuml" => "ÿ", # small y, dieresis or umlaut mark + + "laquo" => "«", # left pointing double angle quotation mark + "lchevron" => "«", # synonym (backwards compatibility) + "raquo" => "»", # right pointing double angle quotation mark + "rchevron" => "»", # synonym (backwards compatibility) + + "iexcl" => "¡", # inverted exclamation mark + "cent" => "¢", # cent sign + "pound" => "£", # (UK) pound sign + + "yen" => "¥", # yen sign + + "sect" => "§", # section sign + "uml" => "¨", # diaresis + "copy" => "©", # Copyright symbol + "ordf" => "ª", # feminine ordinal indicator + "not" => "¬", # not sign + + "reg" => "®", # registered trademark + "macr" => "¯", # macron, overline + "deg" => "°", # degree sign + "plusmn" => "±", # plus-minus sign + + "acute" => "´", # acute accent + "micro" => "µ", # micro sign + "para" => "¶", # pilcrow sign = paragraph sign + "middot" => "·", # middle dot = Georgian comma + "cedil" => "¸", # cedilla + + "ordm" => "º", # masculine ordinal indicator + + "iquest" => "¿", # inverted question mark + "times" => "x", # multiplication sign (just an x) + "divide" => "÷", # division sign + + "nbsp" => " ", # non-breaking space +); + + +# This array maps the ISO 8559-1 (Latin-1) character set to the Mac standard Roman +character set. +# Latin 1: The first 127 characters are equal to ASCII. (0x00 - 0x7F, 0 - 127, +where 127 is non-printable) +# The next 32 characters are non-printable. (0x80 - 0x9F, 128 - 159) +# The last 96 characters are shown below. (0xA0 - 0xFF, 160 - +255) +# +# (subtract 160 from the Latin-1 char code to get the array index) +@Latin1_to_MacRoman_MAP = ( + + 0xCA, # 0 -- 0xA0, 160, ' ' -> nonbreaking space + 0xC1, # 1 -- 0xA1, 161, '¡' -> inverted +exclamation mark + 0xA2, # 2 -- 0xA2, 162, '¢' -> cent sign + 0xA3, # 3 -- 0xA3, 163, '£' -> (UK) pound sign + 0xDB, # 4 -- 0xA4, 164, '¤' -> currency sign (some +fonts display it as Euro sign; select e.g. Futura to see it) + 0xB4, # 5 -- 0xA5, 165, '¥' -> yen sign + 0x00, # 6 -- 0xA6, 166, n/a -> broken vertical bar +(not in the Mac standard Roman character set) + 0xA4, # 7 -- 0xA7, 167, '§' -> section sign + 0xAC, # 8 -- 0xA8, 168, '¨' -> diaresis + 0xA9, # 9 -- 0xA9, 169, '©' -> Copyright symbol + + 0xBB, # 10 -- 0xAA, 170, 'ª' -> feminine ordinal +indicator + 0xC7, # 11 -- 0xAB, 171, '«' -> left pointing +double angle quotation mark, guillemotleft + 0xC2, # 12 -- 0xAC, 172, '¬' -> (logical) not sign + 0xD0, # 13 -- 0xAD, 173, '' -> soft +(discretionary) hyphen (endash) + 0xA8, # 14 -- 0xAE, 174, '®' -> registered + 0xF8, # 15 -- 0xAF, 175, '¯' -> macron, overline + 0xA1, # 16 -- 0xB0, 176, '°' -> degree sign + 0xB1, # 17 -- 0xB1, 177, '±' -> plus-minus sign + 0x00, # 18 -- 0xB2, 178, n/a -> superscript 2 (not +in the Mac standard Roman character set) + 0x00, # 19 -- 0xB3, 179, n/a -> superscript 3 (not +in the Mac standard Roman character set) + + 0xAB, # 20 -- 0xB4, 180, '´' -> acute accent + 0xB5, # 21 -- 0xB5, 181, 'µ' -> micro sign + 0xA6, # 22 -- 0xB6, 182, '¶' -> pilcrow sign = +paragraph sign + 0xE1, # 23 -- 0xB7, 183, '·' -> middle dot or +periodcentered = Georgian comma + 0xFC, # 24 -- 0xB8, 184, '¸' -> cedilla + 0x00, # 25 -- 0xB9, 185, n/a -> superscript 1 (not +in the Mac standard Roman character set) + 0xBC, # 26 -- 0xBA, 186, 'º' -> masculine ordinal +indicator + 0xC8, # 27 -- 0xBB, 187, '»' -> right pointing +double angle quotation mark, guillemotright + 0x00, # 28 -- 0xBC, 188, n/a -> vulgar fraction one +quarter (not in the Mac standard Roman character set) + 0x00, # 29 -- 0xBD, 189, n/a -> vulgar fraction one +half (not in the Mac standard Roman character set) + + 0x00, # 30 -- 0xBE, 190, n/a -> vulgar fraction +three quarters (not in the Mac standard Roman character set) + 0xC0, # 31 -- 0xBF, 191, '¿' -> inverted question +mark + 0xCB, # 32 -- 0xC0, 192, 'À' -> capital A, grave +accent + 0xE7, # 33 -- 0xC1, 193, 'Á' -> capital A, acute +accent + 0xE5, # 34 -- 0xC2, 194, 'Â' -> capital A, +circumflex accent + 0xCC, # 35 -- 0xC3, 195, 'Ã' -> capital A, tilde + 0x80, # 36 -- 0xC4, 196, 'Ä' -> capital A, dieresis +or umlaut mark + 0x81, # 37 -- 0xC5, 197, 'Å' -> capital A, ring + 0xAE, # 38 -- 0xC6, 198, 'Æ' -> capital AE +diphthong (ligature) + 0x82, # 39 -- 0xC7, 199, 'Ç' -> capital C, cedilla + + 0xE9, # 40 -- 0xC8, 200, 'È' -> capital E, grave +accent + 0x83, # 41 -- 0xC9, 201, 'É' -> capital E, acute +accent + 0xE6, # 42 -- 0xCA, 202, 'Ê' -> capital E, +circumflex accent + 0xE8, # 43 -- 0xCB, 203, 'Ë' -> capital E, dieresis +or umlaut mark + 0xED, # 44 -- 0xCC, 204, 'Ì' -> capital I, grave +accent + 0xEA, # 45 -- 0xCD, 205, 'Í' -> capital I, acute +accent + 0xEB, # 46 -- 0xCE, 206, 'Î' -> capital I, +circumflex accent + 0xEC, # 47 -- 0xCF, 207, 'Ï' -> capital I, dieresis +or umlaut mark + 0x00, # 48 -- 0xD0, 208, n/a -> capital Eth, +Icelandic (not in the Mac standard Roman character set) + 0x84, # 49 -- 0xD1, 209, 'Ñ' -> capital N, tilde + + 0xF1, # 50 -- 0xD2, 210, 'Ò' -> capital O, grave +accent + 0xEE, # 51 -- 0xD3, 211, 'Ó' -> capital O, acute +accent + 0xEF, # 52 -- 0xD4, 212, 'Ô' -> capital O, +circumflex accent + 0xCD, # 53 -- 0xD5, 213, 'Õ' -> capital O, tilde + 0x85, # 54 -- 0xD6, 214, 'Ö' -> capital O, dieresis +or umlaut mark + 0x00, # 55 -- 0xD7, 215, n/a -> multiplication sign +(not in the Mac standard Roman character set) + 0xAF, # 56 -- 0xD8, 216, 'Ø' -> capital O, slash + 0xF4, # 57 -- 0xD9, 217, 'Ù' -> capital U, grave +accent + 0xF2, # 58 -- 0xDA, 218, 'Ú' -> capital U, acute +accent + 0xF3, # 59 -- 0xDB, 219, 'Û' -> capital U, +circumflex accent + + 0x86, # 60 -- 0xDC, 220, 'Ü' -> capital U, dieresis +or umlaut mark + 0x00, # 61 -- 0xDD, 221, n/a -> capital Y, acute +accent (not in the Mac standard Roman character set) + 0x00, # 62 -- 0xDE, 222, n/a -> capital THORN, +Icelandic (not in the Mac standard Roman character set) + 0xA7, # 63 -- 0xDF, 223, 'ß' -> small sharp s, +German (sz ligature) + 0x88, # 64 -- 0xE0, 224, 'à' -> small a, grave +accent + 0x87, # 65 -- 0xE1, 225, 'á' -> small a, acute +accent + 0x89, # 66 -- 0xE2, 226, 'â' -> small a, circumflex +accent + 0x8B, # 67 -- 0xE3, 227, 'ã' -> small a, tilde + 0x8A, # 69 -- 0xE4, 228, 'ä' -> small a, dieresis +or umlaut mark + 0x8C, # 69 -- 0xE5, 229, 'å' -> small a, ring + + 0xBE, # 70 -- 0xE6, 230, 'æ' -> small ae diphthong +(ligature) + 0x8D, # 71 -- 0xE7, 231, 'ç' -> small c, cedilla + 0x8F, # 72 -- 0xE8, 232, 'è' -> small e, grave +accent + 0x8E, # 73 -- 0xE9, 233, 'é' -> small e, acute +accent + 0x90, # 74 -- 0xEA, 234, 'ê' -> small e, circumflex +accent + 0x91, # 75 -- 0xEB, 235, 'ë' -> small e, dieresis +or umlaut mark + 0x93, # 76 -- 0xEC, 236, 'ì' -> small i, grave +accent + 0x92, # 77 -- 0xED, 237, 'í' -> small i, acute +accent + 0x94, # 78 -- 0xEE, 238, 'î' -> small i, circumflex +accent + 0x95, # 79 -- 0xEF, 239, 'ï' -> small i, dieresis +or umlaut mark + + 0x00, # 80 -- 0xF0, 240, n/a -> small eth, +Icelandic (not in the Mac standard Roman character set) + 0x96, # 81 -- 0xF1, 241, 'ñ' -> small n, tilde + 0x98, # 82 -- 0xF2, 242, 'ò' -> small o, grave +accent + 0x97, # 83 -- 0xF3, 243, 'ó' -> small o, acute +accent + 0x99, # 84 -- 0xF4, 244, 'ô' -> small o, circumflex +accent + 0x9B, # 85 -- 0xF5, 245, 'õ' -> small o, tilde + 0xBB, # 86 -- 0xF6, 246, 'ö' -> small o, dieresis +or umlaut mark + 0xD6, # 87 -- 0xF7, 247, '÷' -> division sign + 0xBF, # 88 -- 0xF8, 248, 'ø' -> small o, slash + 0x9D, # 89 -- 0xF9, 249, 'ù' -> small u, grave +accent + + 0x9C, # 90 -- 0xFA, 250, 'ú' -> small u, acute +accent + 0x9E, # 91 -- 0xFB, 251, 'û' -> small u, circumflex +accent + 0x9F, # 92 -- 0xFC, 252, 'ü' -> small u, dieresis +or umlaut mark + 0x00, # 93 -- 0xFD, 253, n/a -> small y, acute +accent (not in the Mac standard Roman character set) + 0x00, # 94 -- 0xFE, 254, n/a -> small thorn, +Icelandic (not in the Mac standard Roman character set) + 0xD8 # 95 -- 0xFF, 255, 'ÿ' -> small y, dieresis +or umlaut mark + +); + + + +sub command { + my ($parser, $command, $paragraph, $line_num) = @_; + my $out_fh = $parser->output_handle(); + my $expansion; + + # interpret the command and its text + # for indexing, only =heads and =items are of interest + if ( ($command eq 'head1') || ($command eq 'head2') || + ($command eq 'head3') || ($command eq 'head4') || + ($command eq 'item') ) + { + # expand formatting codes + $expansion = $parser->interpolate($paragraph, $line_num); + # print to out file + print $out_fh '=' . $command . ' ' . $expansion; + } +} + +sub verbatim { + my ($parser, $paragraph, $line_num) = @_; + # do nothing +} + +sub textblock { + my ($parser, $paragraph, $line_num) = @_; + # do nothing +} + + +# Handle links. Most of the work is done by MyPodParseLink. +sub seq_Link { + my ($self, $link, $seq) = @_; + my ($text, $type) = (MyPodParseLink::parselink ($link))[1,4]; + my ($file, $line) = $seq->file_line; + $text = $self->interpolate ($text, $line); + $text = '<' . $text . '>' if $type eq 'url'; + return $text || ''; +} + +# Called for a formatting code. Gets the command, argument, and a +# Pod::InteriorSequence object and is expected to return the resulting text. +sub interior_sequence { + my ($parser, $seq_command, $seq_argument, $pod_seq) = @_; + + my $parent = $pod_seq->nested; + while (defined $parent) { + return $pod_seq->raw_text if ($parent->cmd_name eq 'L'); + $parent = $parent->nested; + } + + # get filename and line number + my ($file, $line) = $pod_seq->file_line; + + # Index entries are ignored in plain text. + return '' if ($seq_command eq 'X' || $seq_command eq 'Z'); + + # Expand escapes into the actual character now, warning if invalid. + if ($seq_command eq 'E') { + + my $is_Number = 0; + if ( $seq_argument =~ m/^(0x[a-fA-F0-9]+)$/ ) { + # hexadecimal + $charCode = hex($1); + $is_Number = 1; + } + elsif ( $seq_argument =~ m/^(0[0-7]+)$/ ) { + # octal + $charCode = oct($1); + $is_Number = 1; + } + elsif ( $seq_argument =~ m/^(0|(?:[1-9][0-9]*))$/) { + # decimal, but not e.g. 099 + $charCode = $1; + $is_Number = 1; + } + + if ($is_Number) { # $seq_argument is a character number + + if ( ($charCode < 0) || ($charCode > 255) ) { + warn "$file:$line: Character code out of range: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + + } + else { + + if ($charCode < 32) { + # standard ASCII, non-printable + + warn "$file:$line: Character is non-printable: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + elsif ( ($charCode >= 32) && ($charCode < 127) ) { + # standard ASCII + return chr($charCode); + + } + elsif ( ($charCode >= 127) && ($charCode < 160) ) { + # 127 : standard ASCII, but +non-printable (DEL) + # 128 - 159 : not defined in ISO 8559-1 +(Latin-1) + warn "$file:$line: Character is non-printable: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + else { + # 160 - 255 (0xA0 - 0xFF) + # map ISO 8559-1 (Latin-1) to Mac standard +Roman + my $MacCharCode = +$Latin1_to_MacRoman_MAP[$charCode-160]; + if ($MacCharCode == 0) { + # not in Mac Roman + warn "$file:$line: Character is not in +the Macintosh standard Roman character set: E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + + } + else { + return chr($MacCharCode); + } + } + } + + } + else { + return $ESCAPES{$seq_argument} if defined $ESCAPES{$seq_argument}; + warn "$file:$line: Unknown escape: E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + } + + # For all the other formatting codes, empty content produces no output. + return if $seq_argument eq ''; + + # L gets dispatched to another method, anything else returns its argument + if ($seq_command eq 'B') { return "$seq_argument";} + elsif ($seq_command eq 'C') { return "$seq_argument"; } + elsif ($seq_command eq 'F') { return "$seq_argument"; } + elsif ($seq_command eq 'I') { return "$seq_argument"; } + elsif ($seq_command eq 'S') { return "$seq_argument"; } + elsif ($seq_command eq 'L') { return $parser->seq_Link ($seq_argument, $pod_seq) } + else { + warn "$file:$line: Unknown formatting code: $$seq_command<${seq_argument}>\n"; + } +} + + +############################################################################## +# MAIN +############################################################################## +package main; + ($index = shift @ARGV) || die "No help index specified"; dbmopen(%INDEX, $index, 0666) || die "Couldn't open index file \"$index\""; @@ -123,6 +578,15 @@ local($file, $var) = @_; local(%rename, %alias); + # Create a parser object + my $parser = new MyPodIndexer(); + $file =~ m/([^:]+)\.pod$/; + my $indexfile = $1 . '.idx'; # our temporary index file + + # Read POD from file.pod and write to file.idx + # (create the index file in our current directory, which normally is +":macperl_src:macperl:") + $parser->parse_from_file ("::perl$file", ":$indexfile"); + $url = $file; if ($url =~ /^:/) { $url = $'; @@ -132,7 +596,7 @@ $url =~ s|:|/|g; $url = "pod://$url"; } - open(INDEX, "::perl$file") || die "Can't open index file \"$file\""; + open(INDEX, ":$indexfile") || die "Can't open index file \"$indexfile\""; while (<>) { if (/\@end/) { goto startindex; @@ -154,7 +618,13 @@ while (<INDEX>) { if (/^=item\s+(.*\S)\s*$/) { $name = $1; - next if ($name eq "*"); + + # bullet lists and numbered lists are not indexed by Shuck + next if ($name eq "*"); # skip =item * + next if ($name =~ m/^\d+\.?$/); # skip =item 1/2./0377 + (decimal, octal) + next if ($name =~ m/^0x[0-9a-fA-F]+$/); # skip =item 0x20 + (hex) + next if ($name =~ m/^0b[01]+$/); # skip =item 0b1100 + (binary) + if ($name =~ /^\$([A-Za-z_]+|\^[A-Z]|<I<digit>>|.)(.?)/) { if ($2 eq "{") { $key = "\%$1"; @@ -192,4 +662,7 @@ warn "\"$value\" undefined" unless $INDEX{$value}; $INDEX{$lastkey = $key} ||= $INDEX{$value}; } + + close INDEX; + unlink $indexfile; } ==== //depot/macperl/macos/macperl/MacPerl.podhelp#4 (text) ==== Index: macperl/macos/macperl/MacPerl.podhelp --- macperl/macos/macperl/MacPerl.podhelp#3~16977~ Sun Jun 2 10:21:45 2002 +++ macperl/macos/macperl/MacPerl.podhelp Mon Jul 29 20:16:52 2002 @@ -87,10 +87,8 @@ pod:pod/perlop.pod Operators and precedence (perlop) pod:pod/perlsub.pod Subroutines (perlsub) pod:pod/perlfunc.pod Builtin functions (perlfunc) -@sub Function Tutorials pod:pod/perlopentut.pod open() tutorial (perlopentut) pod:pod/perlpacktut.pod pack() and unpack() tutorial (perlpacktut) -@end @sep pod:pod/perlpod.pod Plain old documentation (perlpod) pod:pod/perlpodspec.pod Plain old documentation format specification (perlpod) @@ -122,13 +120,11 @@ @sep pod:pod/perlsec.pod Security (perlsec) @sep -@sub Modules -pod:pod/perlmod.pod How they work (perlmod) +pod:pod/perlmod.pod How modules work (perlmod) pod:pod/perlmodlib.pod How to write and use (perlmodlib) pod:pod/perlmodstyle.pod How to write with style (perlmodstyle) pod:pod/perlmodinstall.pod How to install from CPAN (perlmodinstall) pod:pod/perlnewmod.pod Preparing a new module for distribution (perlnewmod) -@end @sep pod:pod/perlutil.pod Utilities packaged with the Perl distribution (perlutil) pod:pod/perlfilter.pod Source filters (perlfilter) @@ -209,7 +205,8 @@ @alias ^ Bitwise Or and Exclusive Or @alias && C-style Logical And @alias || C-style Logical Or -@alias .. Range Operator +@alias .. Range Operators +@alias ... Range Operators @alias ? Conditional Operator @alias : Conditional Operator @alias = Assignment Operators ==== //depot/maint-5.6/macperl/macos/macperl/BuildHelpIndex#2 (text) ==== Index: macperl/macos/macperl/BuildHelpIndex --- macperl/macos/macperl/BuildHelpIndex#1~11243~ Mon Jul 9 15:16:12 2001 +++ macperl/macos/macperl/BuildHelpIndex Mon Jul 29 20:16:52 2002 @@ -3,6 +3,461 @@ #!perl -s +use Pod::Parser; + +package MyPodParseLink; + +############################################################################## +# Since Pod::ParseLink is (a) not part of the standard library prior to Perl +# 5.8, and (b) I needed to make a little change for compatibility with Shuck, +# its code was copied over here. All credits go to Russ Allbery. +############################################################################## + +# Parse the name and section portion of a link into a name and section. +sub _parse_section { + my ($link) = @_; + $link =~ s/^\s+//; + $link =~ s/\s+$//; + + # If the whole link is enclosed in quotes, interpret it all as a section + # even if it contains a slash. + return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); + + # Split into page and section on slash, and then clean up quoting in the + # section. If there is no section and the name contains spaces, also + # guess that it's an old section link. + my ($page, $section) = split (/\s*\/\s*/, $link, 2); + $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; + if ($page && $page =~ / / && !defined ($section)) { + $section = $page; + $page = undef; + } else { + $page = undef unless $page; + $section = undef unless $section; + } + return ($page, $section); +} + +# Infer link text from the page and section. +sub _infer_text { + my ($page, $section) = @_; + my $inferred; + if ($page && !$section) { + $inferred = $page; + } elsif (!$page && $section) { + $inferred = '"' . $section . '"'; + } elsif ($page && $section) { + # The string 'the section ' string is prepended for compatibility with + # Shuck (this is the only difference from Pod::ParseLink) + $inferred = 'the section ' . '"' . $section . '" in ' . $page; + } + return $inferred; +} + +# Given the contents of an L<> formatting code, parse it into the link text, +# the possibly inferred link text, the name or URL, the section, and the type +# of link (pod, man, or url). +sub parselink { + my ($link) = @_; + $link =~ s/\s+/ /g; + if ($link =~ /\A\w+:[^:\s]\S*\Z/) { + return (undef, $link, $link, undef, 'url'); + } else { + my $text; + if ($link =~ /\|/) { + ($text, $link) = split (/\|/, $link, 2); + } + my ($name, $section) = _parse_section ($link); + my $inferred = $text || _infer_text ($name, $section); + my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; + return ($text, $inferred, $name, $section, $type); + } +} + + +############################################################################## +# And now our Pod::Parser subclass, MyPodIndexer +# (A lot of this code was borrowed from Pod::Text, credits to Russ Allbery.) +############################################################################## +package MyPodIndexer; +@ISA = qw(Pod::Parser); + +# Table of supported E<> escapes for the Macintosh standard Roman character set. +# (These are exactly the same escapes as known by Shuck. This list is incomplete +# since the Macintosh standard Roman character set doesn't contain all characters +# defined in the ISO 8559-1 (Latin 1) character set.) +%ESCAPES = ( + 'amp' => '&', # ampersand + 'apos' => "'", # apostrophe + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + 'sol' => '/', # solidus (forward slash) + 'verbar' => '|', # vertical bar + + "Aacute" => "Á", # capital A, acute accent + "aacute" => "á", # small a, acute accent + "Acirc" => "Â", # capital A, circumflex accent + "acirc" => "â", # small a, circumflex accent + "AElig" => "Æ", # capital AE diphthong (ligature) + "aelig" => "æ", # small ae diphthong (ligature) + "Agrave" => "À", # capital A, grave accent + "agrave" => "à", # small a, grave accent + "Aring" => "Å", # capital A, ring + "aring" => "å", # small a, ring + "Atilde" => "Ã", # capital A, tilde + "atilde" => "ã", # small a, tilde + "Auml" => "Ä", # capital A, dieresis or umlaut mark + "auml" => "ä", # small a, dieresis or umlaut mark + "Ccedil" => "Ç", # capital C, cedilla + "ccedil" => "ç", # small c, cedilla + "Eacute" => "É", # capital E, acute accent + "eacute" => "é", # small e, acute accent + "Ecirc" => "Ê", # capital E, circumflex accent + "ecirc" => "ê", # small e, circumflex accent + "Egrave" => "È", # capital E, grave accent + "egrave" => "è", # small e, grave accent + "ETH" => "Eth", # capital Eth, Icelandic + "eth" => "eth", # small eth, Icelandic + "Euml" => "Ë", # capital E, dieresis or umlaut mark + "euml" => "ë", # small e, dieresis or umlaut mark + "Iacute" => "Í", # capital I, acute accent + "iacute" => "í", # small i, acute accent + "Icirc" => "Î", # capital I, circumflex accent + "icirc" => "î", # small i, circumflex accent + "Igrave" => "Ì", # capital I, grave accent + "igrave" => "ì", # small i, grave accent + "Iuml" => "Ï", # capital I, dieresis or umlaut mark + "iuml" => "ï", # small i, dieresis or umlaut mark + "Ntilde" => "Ñ", # capital N, tilde + "ntilde" => "ñ", # small n, tilde + "Oacute" => "Ó", # capital O, acute accent + "oacute" => "ó", # small o, acute accent + "Ocirc" => "Ô", # capital O, circumflex accent + "ocirc" => "ô", # small o, circumflex accent + "Ograve" => "Ò", # capital O, grave accent + "ograve" => "ò", # small o, grave accent + "Oslash" => "Ø", # capital O, slash + "oslash" => "ø", # small o, slash + "Otilde" => "Õ", # capital O, tilde + "otilde" => "õ", # small o, tilde + "Ouml" => "Ö", # capital O, dieresis or umlaut mark + "ouml" => "ö", # small o, dieresis or umlaut mark + "szlig" => "ß", # small sharp s, German (sz ligature) + "THORN" => "Thorn", # capital THORN, Icelandic + "thorn" => "thorn", # small thorn, Icelandic + "Uacute" => "Ú", # capital U, acute accent + "uacute" => "ú", # small u, acute accent + "Ucirc" => "Û", # capital U, circumflex accent + "ucirc" => "û", # small u, circumflex accent + "Ugrave" => "Ù", # capital U, grave accent + "ugrave" => "ù", # small u, grave accent + "Uuml" => "Ü", # capital U, dieresis or umlaut mark + "uuml" => "ü", # small u, dieresis or umlaut mark + "Yacute" => "´Y", # capital Y, acute accent + "yacute" => "´y", # small y, acute accent + "yuml" => "ÿ", # small y, dieresis or umlaut mark + + "laquo" => "«", # left pointing double angle quotation mark + "lchevron" => "«", # synonym (backwards compatibility) + "raquo" => "»", # right pointing double angle quotation mark + "rchevron" => "»", # synonym (backwards compatibility) + + "iexcl" => "¡", # inverted exclamation mark + "cent" => "¢", # cent sign + "pound" => "£", # (UK) pound sign + + "yen" => "¥", # yen sign + + "sect" => "§", # section sign + "uml" => "¨", # diaresis + "copy" => "©", # Copyright symbol + "ordf" => "ª", # feminine ordinal indicator + "not" => "¬", # not sign + + "reg" => "®", # registered trademark + "macr" => "¯", # macron, overline + "deg" => "°", # degree sign + "plusmn" => "±", # plus-minus sign + + "acute" => "´", # acute accent + "micro" => "µ", # micro sign + "para" => "¶", # pilcrow sign = paragraph sign + "middot" => "·", # middle dot = Georgian comma + "cedil" => "¸", # cedilla + + "ordm" => "º", # masculine ordinal indicator + + "iquest" => "¿", # inverted question mark + "times" => "x", # multiplication sign (just an x) + "divide" => "÷", # division sign + + "nbsp" => " ", # non-breaking space +); + + +# This array maps the ISO 8559-1 (Latin-1) character set to the Mac standard Roman +character set. +# Latin 1: The first 127 characters are equal to ASCII. (0x00 - 0x7F, 0 - 127, +where 127 is non-printable) +# The next 32 characters are non-printable. (0x80 - 0x9F, 128 - 159) +# The last 96 characters are shown below. (0xA0 - 0xFF, 160 - +255) +# +# (subtract 160 from the Latin-1 char code to get the array index) +@Latin1_to_MacRoman_MAP = ( + + 0xCA, # 0 -- 0xA0, 160, ' ' -> nonbreaking space + 0xC1, # 1 -- 0xA1, 161, '¡' -> inverted +exclamation mark + 0xA2, # 2 -- 0xA2, 162, '¢' -> cent sign + 0xA3, # 3 -- 0xA3, 163, '£' -> (UK) pound sign + 0xDB, # 4 -- 0xA4, 164, '¤' -> currency sign (some +fonts display it as Euro sign; select e.g. Futura to see it) + 0xB4, # 5 -- 0xA5, 165, '¥' -> yen sign + 0x00, # 6 -- 0xA6, 166, n/a -> broken vertical bar +(not in the Mac standard Roman character set) + 0xA4, # 7 -- 0xA7, 167, '§' -> section sign + 0xAC, # 8 -- 0xA8, 168, '¨' -> diaresis + 0xA9, # 9 -- 0xA9, 169, '©' -> Copyright symbol + + 0xBB, # 10 -- 0xAA, 170, 'ª' -> feminine ordinal +indicator + 0xC7, # 11 -- 0xAB, 171, '«' -> left pointing +double angle quotation mark, guillemotleft + 0xC2, # 12 -- 0xAC, 172, '¬' -> (logical) not sign + 0xD0, # 13 -- 0xAD, 173, '' -> soft +(discretionary) hyphen (endash) + 0xA8, # 14 -- 0xAE, 174, '®' -> registered + 0xF8, # 15 -- 0xAF, 175, '¯' -> macron, overline + 0xA1, # 16 -- 0xB0, 176, '°' -> degree sign + 0xB1, # 17 -- 0xB1, 177, '±' -> plus-minus sign + 0x00, # 18 -- 0xB2, 178, n/a -> superscript 2 (not +in the Mac standard Roman character set) + 0x00, # 19 -- 0xB3, 179, n/a -> superscript 3 (not +in the Mac standard Roman character set) + + 0xAB, # 20 -- 0xB4, 180, '´' -> acute accent + 0xB5, # 21 -- 0xB5, 181, 'µ' -> micro sign + 0xA6, # 22 -- 0xB6, 182, '¶' -> pilcrow sign = +paragraph sign + 0xE1, # 23 -- 0xB7, 183, '·' -> middle dot or +periodcentered = Georgian comma + 0xFC, # 24 -- 0xB8, 184, '¸' -> cedilla + 0x00, # 25 -- 0xB9, 185, n/a -> superscript 1 (not +in the Mac standard Roman character set) + 0xBC, # 26 -- 0xBA, 186, 'º' -> masculine ordinal +indicator + 0xC8, # 27 -- 0xBB, 187, '»' -> right pointing +double angle quotation mark, guillemotright + 0x00, # 28 -- 0xBC, 188, n/a -> vulgar fraction one +quarter (not in the Mac standard Roman character set) + 0x00, # 29 -- 0xBD, 189, n/a -> vulgar fraction one +half (not in the Mac standard Roman character set) + + 0x00, # 30 -- 0xBE, 190, n/a -> vulgar fraction +three quarters (not in the Mac standard Roman character set) + 0xC0, # 31 -- 0xBF, 191, '¿' -> inverted question +mark + 0xCB, # 32 -- 0xC0, 192, 'À' -> capital A, grave +accent + 0xE7, # 33 -- 0xC1, 193, 'Á' -> capital A, acute +accent + 0xE5, # 34 -- 0xC2, 194, 'Â' -> capital A, +circumflex accent + 0xCC, # 35 -- 0xC3, 195, 'Ã' -> capital A, tilde + 0x80, # 36 -- 0xC4, 196, 'Ä' -> capital A, dieresis +or umlaut mark + 0x81, # 37 -- 0xC5, 197, 'Å' -> capital A, ring + 0xAE, # 38 -- 0xC6, 198, 'Æ' -> capital AE +diphthong (ligature) + 0x82, # 39 -- 0xC7, 199, 'Ç' -> capital C, cedilla + + 0xE9, # 40 -- 0xC8, 200, 'È' -> capital E, grave +accent + 0x83, # 41 -- 0xC9, 201, 'É' -> capital E, acute +accent + 0xE6, # 42 -- 0xCA, 202, 'Ê' -> capital E, +circumflex accent + 0xE8, # 43 -- 0xCB, 203, 'Ë' -> capital E, dieresis +or umlaut mark + 0xED, # 44 -- 0xCC, 204, 'Ì' -> capital I, grave +accent + 0xEA, # 45 -- 0xCD, 205, 'Í' -> capital I, acute +accent + 0xEB, # 46 -- 0xCE, 206, 'Î' -> capital I, +circumflex accent + 0xEC, # 47 -- 0xCF, 207, 'Ï' -> capital I, dieresis +or umlaut mark + 0x00, # 48 -- 0xD0, 208, n/a -> capital Eth, +Icelandic (not in the Mac standard Roman character set) + 0x84, # 49 -- 0xD1, 209, 'Ñ' -> capital N, tilde + + 0xF1, # 50 -- 0xD2, 210, 'Ò' -> capital O, grave +accent + 0xEE, # 51 -- 0xD3, 211, 'Ó' -> capital O, acute +accent + 0xEF, # 52 -- 0xD4, 212, 'Ô' -> capital O, +circumflex accent + 0xCD, # 53 -- 0xD5, 213, 'Õ' -> capital O, tilde + 0x85, # 54 -- 0xD6, 214, 'Ö' -> capital O, dieresis +or umlaut mark + 0x00, # 55 -- 0xD7, 215, n/a -> multiplication sign +(not in the Mac standard Roman character set) + 0xAF, # 56 -- 0xD8, 216, 'Ø' -> capital O, slash + 0xF4, # 57 -- 0xD9, 217, 'Ù' -> capital U, grave +accent + 0xF2, # 58 -- 0xDA, 218, 'Ú' -> capital U, acute +accent + 0xF3, # 59 -- 0xDB, 219, 'Û' -> capital U, +circumflex accent + + 0x86, # 60 -- 0xDC, 220, 'Ü' -> capital U, dieresis +or umlaut mark + 0x00, # 61 -- 0xDD, 221, n/a -> capital Y, acute +accent (not in the Mac standard Roman character set) + 0x00, # 62 -- 0xDE, 222, n/a -> capital THORN, +Icelandic (not in the Mac standard Roman character set) + 0xA7, # 63 -- 0xDF, 223, 'ß' -> small sharp s, +German (sz ligature) + 0x88, # 64 -- 0xE0, 224, 'à' -> small a, grave +accent + 0x87, # 65 -- 0xE1, 225, 'á' -> small a, acute +accent + 0x89, # 66 -- 0xE2, 226, 'â' -> small a, circumflex +accent + 0x8B, # 67 -- 0xE3, 227, 'ã' -> small a, tilde + 0x8A, # 69 -- 0xE4, 228, 'ä' -> small a, dieresis +or umlaut mark + 0x8C, # 69 -- 0xE5, 229, 'å' -> small a, ring + + 0xBE, # 70 -- 0xE6, 230, 'æ' -> small ae diphthong +(ligature) + 0x8D, # 71 -- 0xE7, 231, 'ç' -> small c, cedilla + 0x8F, # 72 -- 0xE8, 232, 'è' -> small e, grave +accent + 0x8E, # 73 -- 0xE9, 233, 'é' -> small e, acute +accent + 0x90, # 74 -- 0xEA, 234, 'ê' -> small e, circumflex +accent + 0x91, # 75 -- 0xEB, 235, 'ë' -> small e, dieresis +or umlaut mark + 0x93, # 76 -- 0xEC, 236, 'ì' -> small i, grave +accent + 0x92, # 77 -- 0xED, 237, 'í' -> small i, acute +accent + 0x94, # 78 -- 0xEE, 238, 'î' -> small i, circumflex +accent + 0x95, # 79 -- 0xEF, 239, 'ï' -> small i, dieresis +or umlaut mark + + 0x00, # 80 -- 0xF0, 240, n/a -> small eth, +Icelandic (not in the Mac standard Roman character set) + 0x96, # 81 -- 0xF1, 241, 'ñ' -> small n, tilde + 0x98, # 82 -- 0xF2, 242, 'ò' -> small o, grave +accent + 0x97, # 83 -- 0xF3, 243, 'ó' -> small o, acute +accent + 0x99, # 84 -- 0xF4, 244, 'ô' -> small o, circumflex +accent + 0x9B, # 85 -- 0xF5, 245, 'õ' -> small o, tilde + 0xBB, # 86 -- 0xF6, 246, 'ö' -> small o, dieresis +or umlaut mark + 0xD6, # 87 -- 0xF7, 247, '÷' -> division sign + 0xBF, # 88 -- 0xF8, 248, 'ø' -> small o, slash + 0x9D, # 89 -- 0xF9, 249, 'ù' -> small u, grave +accent + + 0x9C, # 90 -- 0xFA, 250, 'ú' -> small u, acute +accent + 0x9E, # 91 -- 0xFB, 251, 'û' -> small u, circumflex +accent + 0x9F, # 92 -- 0xFC, 252, 'ü' -> small u, dieresis +or umlaut mark + 0x00, # 93 -- 0xFD, 253, n/a -> small y, acute +accent (not in the Mac standard Roman character set) + 0x00, # 94 -- 0xFE, 254, n/a -> small thorn, +Icelandic (not in the Mac standard Roman character set) + 0xD8 # 95 -- 0xFF, 255, 'ÿ' -> small y, dieresis +or umlaut mark + +); + + + +sub command { + my ($parser, $command, $paragraph, $line_num) = @_; + my $out_fh = $parser->output_handle(); + my $expansion; + + # interpret the command and its text + # for indexing, only =heads and =items are of interest + if ( ($command eq 'head1') || ($command eq 'head2') || + ($command eq 'head3') || ($command eq 'head4') || + ($command eq 'item') ) + { + # expand formatting codes + $expansion = $parser->interpolate($paragraph, $line_num); + # print to out file + print $out_fh '=' . $command . ' ' . $expansion; + } +} + +sub verbatim { + my ($parser, $paragraph, $line_num) = @_; + # do nothing +} + +sub textblock { + my ($parser, $paragraph, $line_num) = @_; + # do nothing +} + + +# Handle links. Most of the work is done by MyPodParseLink. +sub seq_Link { + my ($self, $link, $seq) = @_; + my ($text, $type) = (MyPodParseLink::parselink ($link))[1,4]; + my ($file, $line) = $seq->file_line; + $text = $self->interpolate ($text, $line); + $text = '<' . $text . '>' if $type eq 'url'; + return $text || ''; +} + +# Called for a formatting code. Gets the command, argument, and a +# Pod::InteriorSequence object and is expected to return the resulting text. +sub interior_sequence { + my ($parser, $seq_command, $seq_argument, $pod_seq) = @_; + + my $parent = $pod_seq->nested; + while (defined $parent) { + return $pod_seq->raw_text if ($parent->cmd_name eq 'L'); + $parent = $parent->nested; + } + + # get filename and line number + my ($file, $line) = $pod_seq->file_line; + + # Index entries are ignored in plain text. + return '' if ($seq_command eq 'X' || $seq_command eq 'Z'); + + # Expand escapes into the actual character now, warning if invalid. + if ($seq_command eq 'E') { + + my $is_Number = 0; + if ( $seq_argument =~ m/^(0x[a-fA-F0-9]+)$/ ) { + # hexadecimal + $charCode = hex($1); + $is_Number = 1; + } + elsif ( $seq_argument =~ m/^(0[0-7]+)$/ ) { + # octal + $charCode = oct($1); + $is_Number = 1; + } + elsif ( $seq_argument =~ m/^(0|(?:[1-9][0-9]*))$/) { + # decimal, but not e.g. 099 + $charCode = $1; + $is_Number = 1; + } + + if ($is_Number) { # $seq_argument is a character number + + if ( ($charCode < 0) || ($charCode > 255) ) { + warn "$file:$line: Character code out of range: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + + } + else { + + if ($charCode < 32) { + # standard ASCII, non-printable + + warn "$file:$line: Character is non-printable: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + elsif ( ($charCode >= 32) && ($charCode < 127) ) { + # standard ASCII + return chr($charCode); + + } + elsif ( ($charCode >= 127) && ($charCode < 160) ) { + # 127 : standard ASCII, but +non-printable (DEL) + # 128 - 159 : not defined in ISO 8559-1 +(Latin-1) + warn "$file:$line: Character is non-printable: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + else { + # 160 - 255 (0xA0 - 0xFF) + # map ISO 8559-1 (Latin-1) to Mac standard +Roman + my $MacCharCode = +$Latin1_to_MacRoman_MAP[$charCode-160]; + if ($MacCharCode == 0) { + # not in Mac Roman + warn "$file:$line: Character is not in +the Macintosh standard Roman character set: E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + + } + else { + return chr($MacCharCode); + } + } + } + + } + else { + return $ESCAPES{$seq_argument} if defined $ESCAPES{$seq_argument}; + warn "$file:$line: Unknown escape: E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + } + + # For all the other formatting codes, empty content produces no output. + return if $seq_argument eq ''; + + # L gets dispatched to another method, anything else returns its argument + if ($seq_command eq 'B') { return "$seq_argument";} + elsif ($seq_command eq 'C') { return "$seq_argument"; } + elsif ($seq_command eq 'F') { return "$seq_argument"; } + elsif ($seq_command eq 'I') { return "$seq_argument"; } + elsif ($seq_command eq 'S') { return "$seq_argument"; } + elsif ($seq_command eq 'L') { return $parser->seq_Link ($seq_argument, $pod_seq) } + else { + warn "$file:$line: Unknown formatting code: $$seq_command<${seq_argument}>\n"; + } +} + + +############################################################################## +# MAIN +############################################################################## +package main; + ($index = shift @ARGV) || die "No help index specified"; dbmopen(%INDEX, $index, 0666) || die "Couldn't open index file \"$index\""; @@ -123,6 +578,15 @@ local($file, $var) = @_; local(%rename, %alias); + # Create a parser object + my $parser = new MyPodIndexer(); + $file =~ m/([^:]+)\.pod$/; + my $indexfile = $1 . '.idx'; # our temporary index file + + # Read POD from file.pod and write to file.idx + # (create the index file in our current directory, which normally is +":macperl_src:macperl:") + $parser->parse_from_file ("::perl$file", ":$indexfile"); + $url = $file; if ($url =~ /^:/) { $url = $'; @@ -132,7 +596,7 @@ $url =~ s|:|/|g; $url = "pod://$url"; } - open(INDEX, "::perl$file") || die "Can't open index file \"$file\""; + open(INDEX, ":$indexfile") || die "Can't open index file \"$indexfile\""; while (<>) { if (/\@end/) { goto startindex; @@ -154,7 +618,13 @@ while (<INDEX>) { if (/^=item\s+(.*\S)\s*$/) { $name = $1; - next if ($name eq "*"); + + # bullet lists and numbered lists are not indexed by Shuck + next if ($name eq "*"); # skip =item * + next if ($name =~ m/^\d+\.?$/); # skip =item 1/2./0377 + (decimal, octal) + next if ($name =~ m/^0x[0-9a-fA-F]+$/); # skip =item 0x20 + (hex) + next if ($name =~ m/^0b[01]+$/); # skip =item 0b1100 + (binary) + if ($name =~ /^\$([A-Za-z_]+|\^[A-Z]|<I<digit>>|.)(.?)/) { if ($2 eq "{") { $key = "\%$1"; @@ -192,4 +662,7 @@ warn "\"$value\" undefined" unless $INDEX{$value}; $INDEX{$lastkey = $key} ||= $INDEX{$value}; } + + close INDEX; + unlink $indexfile; } ==== //depot/maint-5.6/macperl/macos/macperl/MacPerl.podhelp#10 (text) ==== Index: macperl/macos/macperl/MacPerl.podhelp --- macperl/macos/macperl/MacPerl.podhelp#9~16472~ Tue May 7 20:51:06 2002 +++ macperl/macos/macperl/MacPerl.podhelp Mon Jul 29 20:16:52 2002 @@ -180,7 +180,8 @@ @alias ^ Bitwise Or and Exclusive Or @alias && C-style Logical And @alias || C-style Logical Or -@alias .. Range Operator +@alias .. Range Operators +@alias ... Range Operators @alias ? Conditional Operator @alias : Conditional Operator @alias = Assignment Operators ==== //depot/maint-5.8/macperl/macos/macperl/BuildHelpIndex#2 (text) ==== Index: macperl/macos/macperl/BuildHelpIndex --- macperl/macos/macperl/BuildHelpIndex#1~17650~ Mon Jul 22 08:27:48 2002 +++ macperl/macos/macperl/BuildHelpIndex Mon Jul 29 20:16:52 2002 @@ -3,6 +3,461 @@ #!perl -s +use Pod::Parser; + +package MyPodParseLink; + +############################################################################## +# Since Pod::ParseLink is (a) not part of the standard library prior to Perl +# 5.8, and (b) I needed to make a little change for compatibility with Shuck, +# its code was copied over here. All credits go to Russ Allbery. +############################################################################## + +# Parse the name and section portion of a link into a name and section. +sub _parse_section { + my ($link) = @_; + $link =~ s/^\s+//; + $link =~ s/\s+$//; + + # If the whole link is enclosed in quotes, interpret it all as a section + # even if it contains a slash. + return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); + + # Split into page and section on slash, and then clean up quoting in the + # section. If there is no section and the name contains spaces, also + # guess that it's an old section link. + my ($page, $section) = split (/\s*\/\s*/, $link, 2); + $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; + if ($page && $page =~ / / && !defined ($section)) { + $section = $page; + $page = undef; + } else { + $page = undef unless $page; + $section = undef unless $section; + } + return ($page, $section); +} + +# Infer link text from the page and section. +sub _infer_text { + my ($page, $section) = @_; + my $inferred; + if ($page && !$section) { + $inferred = $page; + } elsif (!$page && $section) { + $inferred = '"' . $section . '"'; + } elsif ($page && $section) { + # The string 'the section ' string is prepended for compatibility with + # Shuck (this is the only difference from Pod::ParseLink) + $inferred = 'the section ' . '"' . $section . '" in ' . $page; + } + return $inferred; +} + +# Given the contents of an L<> formatting code, parse it into the link text, +# the possibly inferred link text, the name or URL, the section, and the type +# of link (pod, man, or url). +sub parselink { + my ($link) = @_; + $link =~ s/\s+/ /g; + if ($link =~ /\A\w+:[^:\s]\S*\Z/) { + return (undef, $link, $link, undef, 'url'); + } else { + my $text; + if ($link =~ /\|/) { + ($text, $link) = split (/\|/, $link, 2); + } + my ($name, $section) = _parse_section ($link); + my $inferred = $text || _infer_text ($name, $section); + my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; + return ($text, $inferred, $name, $section, $type); + } +} + + +############################################################################## +# And now our Pod::Parser subclass, MyPodIndexer +# (A lot of this code was borrowed from Pod::Text, credits to Russ Allbery.) +############################################################################## +package MyPodIndexer; +@ISA = qw(Pod::Parser); + +# Table of supported E<> escapes for the Macintosh standard Roman character set. +# (These are exactly the same escapes as known by Shuck. This list is incomplete +# since the Macintosh standard Roman character set doesn't contain all characters +# defined in the ISO 8559-1 (Latin 1) character set.) +%ESCAPES = ( + 'amp' => '&', # ampersand + 'apos' => "'", # apostrophe + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + 'sol' => '/', # solidus (forward slash) + 'verbar' => '|', # vertical bar + + "Aacute" => "Á", # capital A, acute accent + "aacute" => "á", # small a, acute accent + "Acirc" => "Â", # capital A, circumflex accent + "acirc" => "â", # small a, circumflex accent + "AElig" => "Æ", # capital AE diphthong (ligature) + "aelig" => "æ", # small ae diphthong (ligature) + "Agrave" => "À", # capital A, grave accent + "agrave" => "à", # small a, grave accent + "Aring" => "Å", # capital A, ring + "aring" => "å", # small a, ring + "Atilde" => "Ã", # capital A, tilde + "atilde" => "ã", # small a, tilde + "Auml" => "Ä", # capital A, dieresis or umlaut mark + "auml" => "ä", # small a, dieresis or umlaut mark + "Ccedil" => "Ç", # capital C, cedilla + "ccedil" => "ç", # small c, cedilla + "Eacute" => "É", # capital E, acute accent + "eacute" => "é", # small e, acute accent + "Ecirc" => "Ê", # capital E, circumflex accent + "ecirc" => "ê", # small e, circumflex accent + "Egrave" => "È", # capital E, grave accent + "egrave" => "è", # small e, grave accent + "ETH" => "Eth", # capital Eth, Icelandic + "eth" => "eth", # small eth, Icelandic + "Euml" => "Ë", # capital E, dieresis or umlaut mark + "euml" => "ë", # small e, dieresis or umlaut mark + "Iacute" => "Í", # capital I, acute accent + "iacute" => "í", # small i, acute accent + "Icirc" => "Î", # capital I, circumflex accent + "icirc" => "î", # small i, circumflex accent + "Igrave" => "Ì", # capital I, grave accent + "igrave" => "ì", # small i, grave accent + "Iuml" => "Ï", # capital I, dieresis or umlaut mark + "iuml" => "ï", # small i, dieresis or umlaut mark + "Ntilde" => "Ñ", # capital N, tilde + "ntilde" => "ñ", # small n, tilde + "Oacute" => "Ó", # capital O, acute accent + "oacute" => "ó", # small o, acute accent + "Ocirc" => "Ô", # capital O, circumflex accent + "ocirc" => "ô", # small o, circumflex accent + "Ograve" => "Ò", # capital O, grave accent + "ograve" => "ò", # small o, grave accent + "Oslash" => "Ø", # capital O, slash + "oslash" => "ø", # small o, slash + "Otilde" => "Õ", # capital O, tilde + "otilde" => "õ", # small o, tilde + "Ouml" => "Ö", # capital O, dieresis or umlaut mark + "ouml" => "ö", # small o, dieresis or umlaut mark + "szlig" => "ß", # small sharp s, German (sz ligature) + "THORN" => "Thorn", # capital THORN, Icelandic + "thorn" => "thorn", # small thorn, Icelandic + "Uacute" => "Ú", # capital U, acute accent + "uacute" => "ú", # small u, acute accent + "Ucirc" => "Û", # capital U, circumflex accent + "ucirc" => "û", # small u, circumflex accent + "Ugrave" => "Ù", # capital U, grave accent + "ugrave" => "ù", # small u, grave accent + "Uuml" => "Ü", # capital U, dieresis or umlaut mark + "uuml" => "ü", # small u, dieresis or umlaut mark + "Yacute" => "´Y", # capital Y, acute accent + "yacute" => "´y", # small y, acute accent + "yuml" => "ÿ", # small y, dieresis or umlaut mark + + "laquo" => "«", # left pointing double angle quotation mark + "lchevron" => "«", # synonym (backwards compatibility) + "raquo" => "»", # right pointing double angle quotation mark + "rchevron" => "»", # synonym (backwards compatibility) + + "iexcl" => "¡", # inverted exclamation mark + "cent" => "¢", # cent sign + "pound" => "£", # (UK) pound sign + + "yen" => "¥", # yen sign + + "sect" => "§", # section sign + "uml" => "¨", # diaresis + "copy" => "©", # Copyright symbol + "ordf" => "ª", # feminine ordinal indicator + "not" => "¬", # not sign + + "reg" => "®", # registered trademark + "macr" => "¯", # macron, overline + "deg" => "°", # degree sign + "plusmn" => "±", # plus-minus sign + + "acute" => "´", # acute accent + "micro" => "µ", # micro sign + "para" => "¶", # pilcrow sign = paragraph sign + "middot" => "·", # middle dot = Georgian comma + "cedil" => "¸", # cedilla + + "ordm" => "º", # masculine ordinal indicator + + "iquest" => "¿", # inverted question mark + "times" => "x", # multiplication sign (just an x) + "divide" => "÷", # division sign + + "nbsp" => " ", # non-breaking space +); + + +# This array maps the ISO 8559-1 (Latin-1) character set to the Mac standard Roman +character set. +# Latin 1: The first 127 characters are equal to ASCII. (0x00 - 0x7F, 0 - 127, +where 127 is non-printable) +# The next 32 characters are non-printable. (0x80 - 0x9F, 128 - 159) +# The last 96 characters are shown below. (0xA0 - 0xFF, 160 - +255) +# +# (subtract 160 from the Latin-1 char code to get the array index) +@Latin1_to_MacRoman_MAP = ( + + 0xCA, # 0 -- 0xA0, 160, ' ' -> nonbreaking space + 0xC1, # 1 -- 0xA1, 161, '¡' -> inverted +exclamation mark + 0xA2, # 2 -- 0xA2, 162, '¢' -> cent sign + 0xA3, # 3 -- 0xA3, 163, '£' -> (UK) pound sign + 0xDB, # 4 -- 0xA4, 164, '¤' -> currency sign (some +fonts display it as Euro sign; select e.g. Futura to see it) + 0xB4, # 5 -- 0xA5, 165, '¥' -> yen sign + 0x00, # 6 -- 0xA6, 166, n/a -> broken vertical bar +(not in the Mac standard Roman character set) + 0xA4, # 7 -- 0xA7, 167, '§' -> section sign + 0xAC, # 8 -- 0xA8, 168, '¨' -> diaresis + 0xA9, # 9 -- 0xA9, 169, '©' -> Copyright symbol + + 0xBB, # 10 -- 0xAA, 170, 'ª' -> feminine ordinal +indicator + 0xC7, # 11 -- 0xAB, 171, '«' -> left pointing +double angle quotation mark, guillemotleft + 0xC2, # 12 -- 0xAC, 172, '¬' -> (logical) not sign + 0xD0, # 13 -- 0xAD, 173, '' -> soft +(discretionary) hyphen (endash) + 0xA8, # 14 -- 0xAE, 174, '®' -> registered + 0xF8, # 15 -- 0xAF, 175, '¯' -> macron, overline + 0xA1, # 16 -- 0xB0, 176, '°' -> degree sign + 0xB1, # 17 -- 0xB1, 177, '±' -> plus-minus sign + 0x00, # 18 -- 0xB2, 178, n/a -> superscript 2 (not +in the Mac standard Roman character set) + 0x00, # 19 -- 0xB3, 179, n/a -> superscript 3 (not +in the Mac standard Roman character set) + + 0xAB, # 20 -- 0xB4, 180, '´' -> acute accent + 0xB5, # 21 -- 0xB5, 181, 'µ' -> micro sign + 0xA6, # 22 -- 0xB6, 182, '¶' -> pilcrow sign = +paragraph sign + 0xE1, # 23 -- 0xB7, 183, '·' -> middle dot or +periodcentered = Georgian comma + 0xFC, # 24 -- 0xB8, 184, '¸' -> cedilla + 0x00, # 25 -- 0xB9, 185, n/a -> superscript 1 (not +in the Mac standard Roman character set) + 0xBC, # 26 -- 0xBA, 186, 'º' -> masculine ordinal +indicator + 0xC8, # 27 -- 0xBB, 187, '»' -> right pointing +double angle quotation mark, guillemotright + 0x00, # 28 -- 0xBC, 188, n/a -> vulgar fraction one +quarter (not in the Mac standard Roman character set) + 0x00, # 29 -- 0xBD, 189, n/a -> vulgar fraction one +half (not in the Mac standard Roman character set) + + 0x00, # 30 -- 0xBE, 190, n/a -> vulgar fraction +three quarters (not in the Mac standard Roman character set) + 0xC0, # 31 -- 0xBF, 191, '¿' -> inverted question +mark + 0xCB, # 32 -- 0xC0, 192, 'À' -> capital A, grave +accent + 0xE7, # 33 -- 0xC1, 193, 'Á' -> capital A, acute +accent + 0xE5, # 34 -- 0xC2, 194, 'Â' -> capital A, +circumflex accent + 0xCC, # 35 -- 0xC3, 195, 'Ã' -> capital A, tilde + 0x80, # 36 -- 0xC4, 196, 'Ä' -> capital A, dieresis +or umlaut mark + 0x81, # 37 -- 0xC5, 197, 'Å' -> capital A, ring + 0xAE, # 38 -- 0xC6, 198, 'Æ' -> capital AE +diphthong (ligature) + 0x82, # 39 -- 0xC7, 199, 'Ç' -> capital C, cedilla + + 0xE9, # 40 -- 0xC8, 200, 'È' -> capital E, grave +accent + 0x83, # 41 -- 0xC9, 201, 'É' -> capital E, acute +accent + 0xE6, # 42 -- 0xCA, 202, 'Ê' -> capital E, +circumflex accent + 0xE8, # 43 -- 0xCB, 203, 'Ë' -> capital E, dieresis +or umlaut mark + 0xED, # 44 -- 0xCC, 204, 'Ì' -> capital I, grave +accent + 0xEA, # 45 -- 0xCD, 205, 'Í' -> capital I, acute +accent + 0xEB, # 46 -- 0xCE, 206, 'Î' -> capital I, +circumflex accent + 0xEC, # 47 -- 0xCF, 207, 'Ï' -> capital I, dieresis +or umlaut mark + 0x00, # 48 -- 0xD0, 208, n/a -> capital Eth, +Icelandic (not in the Mac standard Roman character set) + 0x84, # 49 -- 0xD1, 209, 'Ñ' -> capital N, tilde + + 0xF1, # 50 -- 0xD2, 210, 'Ò' -> capital O, grave +accent + 0xEE, # 51 -- 0xD3, 211, 'Ó' -> capital O, acute +accent + 0xEF, # 52 -- 0xD4, 212, 'Ô' -> capital O, +circumflex accent + 0xCD, # 53 -- 0xD5, 213, 'Õ' -> capital O, tilde + 0x85, # 54 -- 0xD6, 214, 'Ö' -> capital O, dieresis +or umlaut mark + 0x00, # 55 -- 0xD7, 215, n/a -> multiplication sign +(not in the Mac standard Roman character set) + 0xAF, # 56 -- 0xD8, 216, 'Ø' -> capital O, slash + 0xF4, # 57 -- 0xD9, 217, 'Ù' -> capital U, grave +accent + 0xF2, # 58 -- 0xDA, 218, 'Ú' -> capital U, acute +accent + 0xF3, # 59 -- 0xDB, 219, 'Û' -> capital U, +circumflex accent + + 0x86, # 60 -- 0xDC, 220, 'Ü' -> capital U, dieresis +or umlaut mark + 0x00, # 61 -- 0xDD, 221, n/a -> capital Y, acute +accent (not in the Mac standard Roman character set) + 0x00, # 62 -- 0xDE, 222, n/a -> capital THORN, +Icelandic (not in the Mac standard Roman character set) + 0xA7, # 63 -- 0xDF, 223, 'ß' -> small sharp s, +German (sz ligature) + 0x88, # 64 -- 0xE0, 224, 'à' -> small a, grave +accent + 0x87, # 65 -- 0xE1, 225, 'á' -> small a, acute +accent + 0x89, # 66 -- 0xE2, 226, 'â' -> small a, circumflex +accent + 0x8B, # 67 -- 0xE3, 227, 'ã' -> small a, tilde + 0x8A, # 69 -- 0xE4, 228, 'ä' -> small a, dieresis +or umlaut mark + 0x8C, # 69 -- 0xE5, 229, 'å' -> small a, ring + + 0xBE, # 70 -- 0xE6, 230, 'æ' -> small ae diphthong +(ligature) + 0x8D, # 71 -- 0xE7, 231, 'ç' -> small c, cedilla + 0x8F, # 72 -- 0xE8, 232, 'è' -> small e, grave +accent + 0x8E, # 73 -- 0xE9, 233, 'é' -> small e, acute +accent + 0x90, # 74 -- 0xEA, 234, 'ê' -> small e, circumflex +accent + 0x91, # 75 -- 0xEB, 235, 'ë' -> small e, dieresis +or umlaut mark + 0x93, # 76 -- 0xEC, 236, 'ì' -> small i, grave +accent + 0x92, # 77 -- 0xED, 237, 'í' -> small i, acute +accent + 0x94, # 78 -- 0xEE, 238, 'î' -> small i, circumflex +accent + 0x95, # 79 -- 0xEF, 239, 'ï' -> small i, dieresis +or umlaut mark + + 0x00, # 80 -- 0xF0, 240, n/a -> small eth, +Icelandic (not in the Mac standard Roman character set) + 0x96, # 81 -- 0xF1, 241, 'ñ' -> small n, tilde + 0x98, # 82 -- 0xF2, 242, 'ò' -> small o, grave +accent + 0x97, # 83 -- 0xF3, 243, 'ó' -> small o, acute +accent + 0x99, # 84 -- 0xF4, 244, 'ô' -> small o, circumflex +accent + 0x9B, # 85 -- 0xF5, 245, 'õ' -> small o, tilde + 0xBB, # 86 -- 0xF6, 246, 'ö' -> small o, dieresis +or umlaut mark + 0xD6, # 87 -- 0xF7, 247, '÷' -> division sign + 0xBF, # 88 -- 0xF8, 248, 'ø' -> small o, slash + 0x9D, # 89 -- 0xF9, 249, 'ù' -> small u, grave +accent + + 0x9C, # 90 -- 0xFA, 250, 'ú' -> small u, acute +accent + 0x9E, # 91 -- 0xFB, 251, 'û' -> small u, circumflex +accent + 0x9F, # 92 -- 0xFC, 252, 'ü' -> small u, dieresis +or umlaut mark + 0x00, # 93 -- 0xFD, 253, n/a -> small y, acute +accent (not in the Mac standard Roman character set) + 0x00, # 94 -- 0xFE, 254, n/a -> small thorn, +Icelandic (not in the Mac standard Roman character set) + 0xD8 # 95 -- 0xFF, 255, 'ÿ' -> small y, dieresis +or umlaut mark + +); + + + +sub command { + my ($parser, $command, $paragraph, $line_num) = @_; + my $out_fh = $parser->output_handle(); + my $expansion; + + # interpret the command and its text + # for indexing, only =heads and =items are of interest + if ( ($command eq 'head1') || ($command eq 'head2') || + ($command eq 'head3') || ($command eq 'head4') || + ($command eq 'item') ) + { + # expand formatting codes + $expansion = $parser->interpolate($paragraph, $line_num); + # print to out file + print $out_fh '=' . $command . ' ' . $expansion; + } +} + +sub verbatim { + my ($parser, $paragraph, $line_num) = @_; + # do nothing +} + +sub textblock { + my ($parser, $paragraph, $line_num) = @_; + # do nothing +} + + +# Handle links. Most of the work is done by MyPodParseLink. +sub seq_Link { + my ($self, $link, $seq) = @_; + my ($text, $type) = (MyPodParseLink::parselink ($link))[1,4]; + my ($file, $line) = $seq->file_line; + $text = $self->interpolate ($text, $line); + $text = '<' . $text . '>' if $type eq 'url'; + return $text || ''; +} + +# Called for a formatting code. Gets the command, argument, and a +# Pod::InteriorSequence object and is expected to return the resulting text. +sub interior_sequence { + my ($parser, $seq_command, $seq_argument, $pod_seq) = @_; + + my $parent = $pod_seq->nested; + while (defined $parent) { + return $pod_seq->raw_text if ($parent->cmd_name eq 'L'); + $parent = $parent->nested; + } + + # get filename and line number + my ($file, $line) = $pod_seq->file_line; + + # Index entries are ignored in plain text. + return '' if ($seq_command eq 'X' || $seq_command eq 'Z'); + + # Expand escapes into the actual character now, warning if invalid. + if ($seq_command eq 'E') { + + my $is_Number = 0; + if ( $seq_argument =~ m/^(0x[a-fA-F0-9]+)$/ ) { + # hexadecimal + $charCode = hex($1); + $is_Number = 1; + } + elsif ( $seq_argument =~ m/^(0[0-7]+)$/ ) { + # octal + $charCode = oct($1); + $is_Number = 1; + } + elsif ( $seq_argument =~ m/^(0|(?:[1-9][0-9]*))$/) { + # decimal, but not e.g. 099 + $charCode = $1; + $is_Number = 1; + } + + if ($is_Number) { # $seq_argument is a character number + + if ( ($charCode < 0) || ($charCode > 255) ) { + warn "$file:$line: Character code out of range: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + + } + else { + + if ($charCode < 32) { + # standard ASCII, non-printable + + warn "$file:$line: Character is non-printable: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + elsif ( ($charCode >= 32) && ($charCode < 127) ) { + # standard ASCII + return chr($charCode); + + } + elsif ( ($charCode >= 127) && ($charCode < 160) ) { + # 127 : standard ASCII, but +non-printable (DEL) + # 128 - 159 : not defined in ISO 8559-1 +(Latin-1) + warn "$file:$line: Character is non-printable: +E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + else { + # 160 - 255 (0xA0 - 0xFF) + # map ISO 8559-1 (Latin-1) to Mac standard +Roman + my $MacCharCode = +$Latin1_to_MacRoman_MAP[$charCode-160]; + if ($MacCharCode == 0) { + # not in Mac Roman + warn "$file:$line: Character is not in +the Macintosh standard Roman character set: E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + + } + else { + return chr($MacCharCode); + } + } + } + + } + else { + return $ESCAPES{$seq_argument} if defined $ESCAPES{$seq_argument}; + warn "$file:$line: Unknown escape: E<${seq_argument}>\n"; + return 'E<' . $seq_argument . '>'; + } + } + + # For all the other formatting codes, empty content produces no output. + return if $seq_argument eq ''; + + # L gets dispatched to another method, anything else returns its argument + if ($seq_command eq 'B') { return "$seq_argument";} + elsif ($seq_command eq 'C') { return "$seq_argument"; } + elsif ($seq_command eq 'F') { return "$seq_argument"; } + elsif ($seq_command eq 'I') { return "$seq_argument"; } + elsif ($seq_command eq 'S') { return "$seq_argument"; } + elsif ($seq_command eq 'L') { return $parser->seq_Link ($seq_argument, $pod_seq) } + else { + warn "$file:$line: Unknown formatting code: $$seq_command<${seq_argument}>\n"; + } +} + + +############################################################################## +# MAIN +############################################################################## +package main; + ($index = shift @ARGV) || die "No help index specified"; dbmopen(%INDEX, $index, 0666) || die "Couldn't open index file \"$index\""; @@ -123,6 +578,15 @@ local($file, $var) = @_; local(%rename, %alias); + # Create a parser object + my $parser = new MyPodIndexer(); + $file =~ m/([^:]+)\.pod$/; + my $indexfile = $1 . '.idx'; # our temporary index file + + # Read POD from file.pod and write to file.idx + # (create the index file in our current directory, which normally is +":macperl_src:macperl:") + $parser->parse_from_file ("::perl$file", ":$indexfile"); + $url = $file; if ($url =~ /^:/) { $url = $'; @@ -132,7 +596,7 @@ $url =~ s|:|/|g; $url = "pod://$url"; } - open(INDEX, "::perl$file") || die "Can't open index file \"$file\""; + open(INDEX, ":$indexfile") || die "Can't open index file \"$indexfile\""; while (<>) { if (/\@end/) { goto startindex; @@ -154,7 +618,13 @@ while (<INDEX>) { if (/^=item\s+(.*\S)\s*$/) { $name = $1; - next if ($name eq "*"); + + # bullet lists and numbered lists are not indexed by Shuck + next if ($name eq "*"); # skip =item * + next if ($name =~ m/^\d+\.?$/); # skip =item 1/2./0377 + (decimal, octal) + next if ($name =~ m/^0x[0-9a-fA-F]+$/); # skip =item 0x20 + (hex) + next if ($name =~ m/^0b[01]+$/); # skip =item 0b1100 + (binary) + if ($name =~ /^\$([A-Za-z_]+|\^[A-Z]|<I<digit>>|.)(.?)/) { if ($2 eq "{") { $key = "\%$1"; @@ -192,4 +662,7 @@ warn "\"$value\" undefined" unless $INDEX{$value}; $INDEX{$lastkey = $key} ||= $INDEX{$value}; } + + close INDEX; + unlink $indexfile; } ==== //depot/maint-5.8/macperl/macos/macperl/MacPerl.podhelp#2 (text) ==== Index: macperl/macos/macperl/MacPerl.podhelp --- macperl/macos/macperl/MacPerl.podhelp#1~17650~ Mon Jul 22 08:27:48 2002 +++ macperl/macos/macperl/MacPerl.podhelp Mon Jul 29 20:16:52 2002 @@ -87,10 +87,8 @@ pod:pod/perlop.pod Operators and precedence (perlop) pod:pod/perlsub.pod Subroutines (perlsub) pod:pod/perlfunc.pod Builtin functions (perlfunc) -@sub Function Tutorials pod:pod/perlopentut.pod open() tutorial (perlopentut) pod:pod/perlpacktut.pod pack() and unpack() tutorial (perlpacktut) -@end @sep pod:pod/perlpod.pod Plain old documentation (perlpod) pod:pod/perlpodspec.pod Plain old documentation format specification (perlpod) @@ -122,13 +120,11 @@ @sep pod:pod/perlsec.pod Security (perlsec) @sep -@sub Modules -pod:pod/perlmod.pod How they work (perlmod) +pod:pod/perlmod.pod How modules work (perlmod) pod:pod/perlmodlib.pod How to write and use (perlmodlib) pod:pod/perlmodstyle.pod How to write with style (perlmodstyle) pod:pod/perlmodinstall.pod How to install from CPAN (perlmodinstall) pod:pod/perlnewmod.pod Preparing a new module for distribution (perlnewmod) -@end @sep pod:pod/perlutil.pod Utilities packaged with the Perl distribution (perlutil) pod:pod/perlfilter.pod Source filters (perlfilter) @@ -209,7 +205,8 @@ @alias ^ Bitwise Or and Exclusive Or @alias && C-style Logical And @alias || C-style Logical Or -@alias .. Range Operator +@alias .. Range Operators +@alias ... Range Operators @alias ? Conditional Operator @alias : Conditional Operator @alias = Assignment Operators End of Patch. -- Chris Nandor [EMAIL PROTECTED] http://pudge.net/ Open Source Development Network [EMAIL PROTECTED] http://osdn.com/