It utilizes two simple related utilities: nb2txt and nbinfo. These are both available from http://www.saugus.net/~eric/nb2txt/ and should pretty readily build on most UNIX-like systems.
We've been using it on Saugus.net for about a year now and I figured it was stable enough to share.
#!/usr/bin/perl -w use strict; # # Version 1.0 13-Feb-2004 # Written by Eric W. Brown, but very heavily based on the # earlier work by David Adams <[EMAIL PROTECTED]> # # Uses the Saugus.net nb2txt & nbinfo utilities to read # a Newton book file and produce HTML output. # # Can be called directly from htdig as an external converter, # or may be called by doc2html.pl converter script. #
####--- Configuration ---#### # Full paths of nb2txt and nbinfo
#### YOU MUST SET THESE ####
my $NB2TXT = "/usr/local/bin/nb2txt";
my $NBINFO = "/usr/local/bin/nbinfo";
#
# De-hyphenation option (only affects end-of-line hyphens):
my $Dehyphenate = 1;
#
# Set title to be used when none is found:
my $Default_title = "Newton Book Document";
#
# make portable to win32 platform or unix:
my $null = "/dev/null";
if ($^O eq "MSWin32") {$null = "nul";}
####--- End of configuration ---###if (! -x $NB2TXT) { die "Unable to execute nb2txt" }my $Input = $ARGV[0] || die "Usage: nb2html.pl filename [mime-type] [URL]";
my $MIME_type = $ARGV[1] || '';
if ($MIME_type and ($MIME_type !~ m#^application/x-newton-compatible-pkg#i)) {
die "MIME/type $MIME_type wrong";
}
my $Name = $ARGV[2] || '';
$Name =~ s#^.*/##;
$Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;&nb_head; &nb_body; exit;
#----------------------------------------------------------------------- -------
sub nb_head {
#
# by Eric W. Brown, based pretty heavily on the work
# contributed by Greg Holmes and Michael Fuller and
# modified by David Adams. Uses proper Dublin Core
# notation as appropriate.
#
my $title = '';
my $shortTitle = '';
my $author = '';
my $copyright = '';
my $isbn = '';
my $software = '';
my $publicationDate = '';
my $creationDate = '';
if (open(INFO, "$NBINFO '$Input' 2>$null |")) {
while (<INFO>) {
if (m/^Short Title:/i) {
s/^Short Title: //i;
$shortTitle=&HTML($_);
} elsif (m/^Title:/i) {
s/^Title: //i;
$title=&HTML($_);
} elsif (m/^Author:/i) {
s/^Author: //i;
$author=&HTML($_);
} elsif (m/^Copyright:/i) {
s/^Copyright: //i;
$copyright=&HTML($_);
} elsif (m/^ISBN:/i) {
s/^ISBN: //i;
$isbn=&HTML($_);
} elsif (m/^Software:/i) {
s/^Software: //i;
$software=&HTML($_);
} elsif (m/^Publication Date:/i) {
s/^Publication Date: //i;
$publicationDate=$_;
} elsif (m/^Creation Date:/i) {
s/^Creation Date: //i;
$creationDate=$_;
}
}
close INFO;
} else { warn "cannot execute nbinfo" }
if (not length $shortTitle) {
if ($Name) {
$shortTitle = '[' . $Name . ']';
} else {
$shortTitle = $Default_title;
}
}
if (not length $title) {
$title = $shortTitle
}print "<html>\n<head>\n";
print "<title>$shortTitle</title>\n";
if (length $title) {
print '<meta name="description" content="Newton book version of ' . $title. "\" />\n";
print '<meta name="DC.Title" content="' . $title . "\" />\n";
}
if (length $author) {
print '<meta name="DC.Creator" content="' . $author . "\" />\n";
}
if (length $copyright) {
print '<meta name="DC.Rights" content="' . $copyright . "\" />\n";
}
if (length $isbn) {
print '<meta name="DC.Source" content="' . $isbn . "\" />\n";
}
if (length $publicationDate) {
print '<meta name="DC.Date" content="' . $publicationDate . "\" />\n";
} elsif (length $creationDate) {
print '<meta name="DC.Date" content="' . $creationDate . "\" />\n";
}
print "<meta name=\"DC.Type\" content=\"EBook\" />\n";
print "<meta name=\"DC.Format\" content=\"application/x-newton-compatible-pkg\" />\n";
if (length $software) {
print '<meta name="DC.Contributor" content="' . $software . "\" />\n";
}
print "</head>\n";
}
#----------------------------------------------------------------------- -------
sub nb_body { my $openP = 0;
my $lineNum = 0;
my $pastContents = 0;
my $bline = '';
open(CAT, "$NB2TXT '$Input' |") ||
die "$NB2TXT doesn't want to be opened using pipe\n";
print "<body>\n";
while (<CAT>) {
while ( m/[A-Za-z\300-\377]-\s*$/ && $Dehyphenate) {
$_ .= <CAT>;
last if eof;
s/([A-Za-z\300-\377])-\s*\n\s*([A-Za-z\300-\377])/$1$2/s;
}
s/\255/-/g; # replace dashes with hyphens
# replace bell, backspace, tab. etc. with single space:
s/[\000-\040]+/ /g;
$_ = &HTML($_);
if ($lineNum==0) {
$_="<h1>" . $_ . "</h1>";
print $_,"\n";
} elsif ($lineNum==1) {
$_="<strong>" . $_ . "<strong>";
print $_,"\n";
} elsif ($lineNum==2) {
$_="<h2>" . $_ . "</h2>";
print $_,"\n";
} elsif ($lineNum==3) {
$_="<em>" . $_ . "</em>";
print $_,"\n";
} elsif (m/^Contents$/)
{
$_="<big><strong>" . $_ . "</strong></big>";
print $_,"\n";
$pastContents=1;
} else {
if (length) {
print $bline, $_, "\n";
$bline = "<br />\n";
} elsif ($openP) {
$bline = "</p>\n<p>\n";
} else {
$bline = "<p>\n";
$openP=1;
}
}
$lineNum++;
}
close CAT;print "</p>\n</body>\n</html>\n"; return; }
#----------------------------------------------------------------------- -------
sub HTML {my $text = shift;
$text =~ s/\f/\n/gs; # replace form feed
$text =~ s/\s+/ /g; # replace multiple spaces, etc. with a single space
$text =~ s/^\s+//gm; # remove leading whitespace
$text =~ s/\s+$//gm; # remove trailing whitespace
$text =~ s/&/&/g;
$text =~ s/</</g;
$text =~ s/>/>/g;
$text =~ s/\xA9/©/g;
$text =~ s/\xA2/¢/g;
$text =~ s/\xA3/£/g;
$text =~ s/\xA5/¥/g;
$text =~ s/\xBA/°/g;
$text =~ s/\xB1/±/g;
$text =~ s/\xF7/÷/g;
$text =~ s/\xE6/æ/g;
$text =~ s/\xC6/Æ/g;
$text =~ s/\xE1/á/g;
$text =~ s/\xC1/Á/g;
$text =~ s/\xE0/à/g;
$text =~ s/\xC0/À/g;
$text =~ s/\xE4/&aulm;/g;
$text =~ s/\xC4/&Aulm;/g;
$text =~ s/\xE2/â/g;
$text =~ s/\xC2/Â/g;
$text =~ s/\xE3/ã/g;
$text =~ s/\xC3/Ã/g;
$text =~ s/\xE5/å/g;
$text =~ s/\xC5/Å/g;
$text =~ s/\xE9/é/g;
$text =~ s/\xC9/É/g;
$text =~ s/\xE8/è/g;
$text =~ s/\xC8/È/g;
$text =~ s/\xEB/&eulm;/g;
$text =~ s/\xCB/&Eulm;/g;
$text =~ s/\xEA/ê/g;
$text =~ s/\xCA/Ê/g;
$text =~ s/\xED/í/g;
$text =~ s/\xCD/Í/g;
$text =~ s/\xEC/ì/g;
$text =~ s/\xCC/Ì/g;
$text =~ s/\xEF/&iulm;/g;
$text =~ s/\xCF/&Iulm;/g;
$text =~ s/\xEE/î/g;
$text =~ s/\xCE/Î/g;
$text =~ s/\xF3/ó/g;
$text =~ s/\xD3/Ó/g;
$text =~ s/\xF2/ò/g;
$text =~ s/\xD2/Ò/g;
$text =~ s/\xF6/&oulm;/g;
$text =~ s/\xD6/&Oulm;/g;
$text =~ s/\xF4/ô/g;
$text =~ s/\xD4/Ô/g;
$text =~ s/\xF5/õ/g;
$text =~ s/\xD5/Õ/g;
$text =~ s/\xF8/ø/g;
$text =~ s/\xD8/Ø/g;
$text =~ s/\xFA/ú/g;
$text =~ s/\xDA/Ú/g;
$text =~ s/\xF9/ù/g;
$text =~ s/\xD9/Ù/g;
$text =~ s/\xFC/&uulm;/g;
$text =~ s/\xDC/&Uulm;/g;
$text =~ s/\xFB/û/g;
$text =~ s/\xDB/Û/g;
$text =~ s/\xF1/ñ/g;
$text =~ s/\xD1/Ñ/g;
$text =~ s/\xE7/ç/g;
$text =~ s/\xC7/Ç/g;
$text =~ s/\xFF/ÿ/g;
$text =~ s/\xDF/ß/g;
$text =~ s/\xBF/¿/g;
$text =~ s/\xA1/¡/g;
$text =~ s/\xAB/«/g;
$text =~ s/\xBB/»/g;
chomp $text;
return $text; }
------------------------------------------------------- SF email is sponsored by - The IT Product Guide Read honest & candid reviews on hundreds of IT Products from real users. Discover which products truly live up to the hype. Start reading now. http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click _______________________________________________ ht://Dig general mailing list: <[email protected]> ht://Dig FAQ: http://htdig.sourceforge.net/FAQ.html List information (subscribe/unsubscribe, etc.) https://lists.sourceforge.net/lists/listinfo/htdig-general

