If you copy this to Script Editor and save it as an Application with options
stay-open, no startup as ³MacToUnicodeHTML - R*ch² (where the last four characters
MUST be the creator type of your text editor) you will have a useful droplet to
convert Mac text files to html. You can extend the table, if you like, to do more
than just convert the 8-bit characters, eg,
\">\"=>\">\"
The script will read the dropped file and open the converted result in a temporary
document.
NO charset should be included in the content-type declaration of the final html file.
JD
(*
Save as Stay-open AppleScript Droplet named: ³MacToUnicodeHTML - R*ch²
*)
property editor : "R*ch"
on run
set editor to text -4 through -1 of ("" & (path to me))
return
end run
on open flist
set fin to "" & item 1 of (flist as list)
set editor to text -4 through -1 of ("" & (path to me))
set fout to "" & (path to temporary items folder) & "temp.out"
set n to open for access file fout
close access n
set cmd to {} & "%macToUni = (
\"\\x80\"=>\"Ä\", # LATIN CAPITAL LETTER A WITH DIAERESIS
\"\\x81\"=>\"Å\", # LATIN CAPITAL LETTER A WITH RING ABOVE
\"\\x82\"=>\"Ç\", # LATIN CAPITAL LETTER C WITH CEDILLA
\"\\x83\"=>\"É\", # LATIN CAPITAL LETTER E WITH ACUTE
\"\\x84\"=>\"Ñ\", # LATIN CAPITAL LETTER N WITH TILDE
\"\\x85\"=>\"Ö\", # LATIN CAPITAL LETTER O WITH DIAERESIS
\"\\x86\"=>\"Ü\", # LATIN CAPITAL LETTER U WITH DIAERESIS
\"\\x87\"=>\"á\", # LATIN SMALL LETTER A WITH ACUTE
\"\\x88\"=>\"à\", # LATIN SMALL LETTER A WITH GRAVE
\"\\x89\"=>\"â\", # LATIN SMALL LETTER A WITH CIRCUMFLEX
\"\\x8A\"=>\"ä\", # LATIN SMALL LETTER A WITH DIAERESIS
\"\\x8B\"=>\"ã\", # LATIN SMALL LETTER A WITH TILDE
\"\\x8C\"=>\"å\", # LATIN SMALL LETTER A WITH RING ABOVE
\"\\x8D\"=>\"ç\", # LATIN SMALL LETTER C WITH CEDILLA
\"\\x8E\"=>\"é\", # LATIN SMALL LETTER E WITH ACUTE
\"\\x8F\"=>\"è\", # LATIN SMALL LETTER E WITH GRAVE
\"\\x90\"=>\"ê\", # LATIN SMALL LETTER E WITH CIRCUMFLEX
\"\\x91\"=>\"ë\", # LATIN SMALL LETTER E WITH DIAERESIS
\"\\x92\"=>\"í\", # LATIN SMALL LETTER I WITH ACUTE
\"\\x93\"=>\"ì\", # LATIN SMALL LETTER I WITH GRAVE
\"\\x94\"=>\"î\", # LATIN SMALL LETTER I WITH CIRCUMFLEX
\"\\x95\"=>\"ï\", # LATIN SMALL LETTER I WITH DIAERESIS
\"\\x96\"=>\"ñ\", # LATIN SMALL LETTER N WITH TILDE
\"\\x97\"=>\"ó\", # LATIN SMALL LETTER O WITH ACUTE
\"\\x98\"=>\"ò\", # LATIN SMALL LETTER O WITH GRAVE
\"\\x99\"=>\"ô\", # LATIN SMALL LETTER O WITH CIRCUMFLEX
\"\\x9A\"=>\"ö\", # LATIN SMALL LETTER O WITH DIAERESIS
\"\\x9B\"=>\"õ\", # LATIN SMALL LETTER O WITH TILDE
\"\\x9C\"=>\"ú\", # LATIN SMALL LETTER U WITH ACUTE
\"\\x9D\"=>\"ù\", # LATIN SMALL LETTER U WITH GRAVE
\"\\x9E\"=>\"û\", # LATIN SMALL LETTER U WITH CIRCUMFLEX
\"\\x9F\"=>\"ü\", # LATIN SMALL LETTER U WITH DIAERESIS
\"\\xA0\"=>\"†\", # DAGGER
\"\\xA1\"=>\"°\", # DEGREE SIGN
\"\\xA2\"=>\"¢\", # CENT SIGN
\"\\xA3\"=>\"£\", # POUND SIGN
\"\\xA4\"=>\"§\", # SECTION SIGN
\"\\xA5\"=>\"•\", # BULLET
\"\\xA6\"=>\"¶\", # PILCROW SIGN
\"\\xA7\"=>\"ß\", # LATIN SMALL LETTER SHARP S
\"\\xA8\"=>\"®\", # REGISTERED SIGN
\"\\xA9\"=>\"©\", # COPYRIGHT SIGN
\"\\xAA\"=>\"™\", # TRADE MARK SIGN
\"\\xAB\"=>\"´\", # ACUTE ACCENT
\"\\xAC\"=>\"¨\", # DIAERESIS
\"\\xAD\"=>\"≠\", # NOT EQUAL TO
\"\\xAE\"=>\"Æ\", # LATIN CAPITAL LETTER AE
\"\\xAF\"=>\"Ø\", # LATIN CAPITAL LETTER O WITH STROKE
\"\\xB0\"=>\"∞\", # INFINITY
\"\\xB1\"=>\"±\", # PLUS-MINUS SIGN
\"\\xB2\"=>\"≤\", # LESS-THAN OR EQUAL TO
\"\\xB3\"=>\"≥\", # GREATER-THAN OR EQUAL TO
\"\\xB4\"=>\"¥\", # YEN SIGN
\"\\xB5\"=>\"µ\", # MICRO SIGN
\"\\xB6\"=>\"∂\", # PARTIAL DIFFERENTIAL
\"\\xB7\"=>\"∑\", # N-ARY SUMMATION
\"\\xB8\"=>\"∏\", # N-ARY PRODUCT
\"\\xB9\"=>\"π\", # GREEK SMALL LETTER PI
\"\\xBA\"=>\"∫\", # INTEGRAL
\"\\xBB\"=>\"ª\", # FEMININE ORDINAL INDICATOR
\"\\xBC\"=>\"º\", # MASCULINE ORDINAL INDICATOR
\"\\xBD\"=>\"Ω\", # GREEK CAPITAL LETTER OMEGA
\"\\xBE\"=>\"æ\", # LATIN SMALL LETTER AE
\"\\xBF\"=>\"ø\", # LATIN SMALL LETTER O WITH STROKE
\"\\xC0\"=>\"¿\", # INVERTED QUESTION MARK
\"\\xC1\"=>\"¡\", # INVERTED EXCLAMATION MARK
\"\\xC2\"=>\"¬\", # NOT SIGN
\"\\xC3\"=>\"√\", # SQUARE ROOT
\"\\xC4\"=>\"ƒ\", # LATIN SMALL LETTER F WITH HOOK
\"\\xC5\"=>\"≈\", # ALMOST EQUAL TO
\"\\xC6\"=>\"∆\", # INCREMENT
\"\\xC7\"=>\"«\", # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
\"\\xC8\"=>\"»\", # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
\"\\xC9\"=>\"…\", # HORIZONTAL ELLIPSIS
\"\\xCA\"=>\" \", # NO-BREAK SPACE
\"\\xCB\"=>\"À\", # LATIN CAPITAL LETTER A WITH GRAVE
\"\\xCC\"=>\"Ã\", # LATIN CAPITAL LETTER A WITH TILDE
\"\\xCD\"=>\"Õ\", # LATIN CAPITAL LETTER O WITH TILDE
\"\\xCE\"=>\"Œ\", # LATIN CAPITAL LIGATURE OE
\"\\xCF\"=>\"œ\", # LATIN SMALL LIGATURE OE
\"\\xD0\"=>\"–\", # EN DASH
\"\\xD1\"=>\"—\", # EM DASH
\"\\xD2\"=>\"“\", # LEFT DOUBLE QUOTATION MARK
\"\\xD3\"=>\"”\", # RIGHT DOUBLE QUOTATION MARK
\"\\xD4\"=>\"‘\", # LEFT SINGLE QUOTATION MARK
\"\\xD5\"=>\"’\", # RIGHT SINGLE QUOTATION MARK
\"\\xD6\"=>\"÷\", # DIVISION SIGN
\"\\xD7\"=>\"◊\", # LOZENGE
\"\\xD8\"=>\"ÿ\", # LATIN SMALL LETTER Y WITH DIAERESIS
\"\\xD9\"=>\"Ÿ\", # LATIN CAPITAL LETTER Y WITH DIAERESIS
\"\\xDA\"=>\"⁄\", # FRACTION SLASH
\"\\xDB\"=>\"€\", # EURO SIGN
\"\\xDC\"=>\"‹\", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
\"\\xDD\"=>\"›\", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
\"\\xDE\"=>\"fi\", # LATIN SMALL LIGATURE FI
\"\\xDF\"=>\"fl\", # LATIN SMALL LIGATURE FL
\"\\xE0\"=>\"‡\", # DOUBLE DAGGER
\"\\xE1\"=>\"·\", # MIDDLE DOT
\"\\xE2\"=>\"‚\", # SINGLE LOW-9 QUOTATION MARK
\"\\xE3\"=>\"„\", # DOUBLE LOW-9 QUOTATION MARK
\"\\xE4\"=>\"‰\", # PER MILLE SIGN
\"\\xE5\"=>\"Â\", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
\"\\xE6\"=>\"Ê\", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
\"\\xE7\"=>\"Á\", # LATIN CAPITAL LETTER A WITH ACUTE
\"\\xE8\"=>\"Ë\", # LATIN CAPITAL LETTER E WITH DIAERESIS
\"\\xE9\"=>\"È\", # LATIN CAPITAL LETTER E WITH GRAVE
\"\\xEA\"=>\"Í\", # LATIN CAPITAL LETTER I WITH ACUTE
\"\\xEB\"=>\"Î\", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
\"\\xEC\"=>\"Ï\", # LATIN CAPITAL LETTER I WITH DIAERESIS
\"\\xED\"=>\"Ì\", # LATIN CAPITAL LETTER I WITH GRAVE
\"\\xEE\"=>\"Ó\", # LATIN CAPITAL LETTER O WITH ACUTE
\"\\xEF\"=>\"Ô\", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
\"\\xF0\"=>\"\", # Apple logo
\"\\xF1\"=>\"Ò\", # LATIN CAPITAL LETTER O WITH GRAVE
\"\\xF2\"=>\"Ú\", # LATIN CAPITAL LETTER U WITH ACUTE
\"\\xF3\"=>\"Û\", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
\"\\xF4\"=>\"Ù\", # LATIN CAPITAL LETTER U WITH GRAVE
\"\\xF5\"=>\"ı\", # LATIN SMALL LETTER DOTLESS I
\"\\xF6\"=>\"ˆ\", # MODIFIER LETTER CIRCUMFLEX ACCENT
\"\\xF7\"=>\"˜\", # SMALL TILDE
\"\\xF8\"=>\"¯\", # MACRON
\"\\xF9\"=>\"˘\", # BREVE
\"\\xFA\"=>\"˙\", # DOT ABOVE
\"\\xFB\"=>\"˚\", # RING ABOVE
\"\\xFC\"=>\"¸\", # CEDILLA
\"\\xFD\"=>\"˝\", # DOUBLE ACUTE ACCENT
\"\\xFE\"=>\"˛\", # OGONEK
\"\\xFF\"=>\"ˇ\" # CARON
);
$fin = $ARGV[0];
$tmp = $ENV{TMPDIR};
$fout = $tmp . 'temp.out';
open FOUT, \">$fout\" or die $!;
open FIN, $fin or die $!;
while (read FIN, $_, 2^15) {
s~([\\x80-\\xFF])~$macToUni{$1}~g;
print FOUT;
}
close FOUT;"
set end of cmd to fin
tell application "MacPerl"
Do Script cmd
end tell
tell application "Finder"
open file fout using application file id editor
end tell
end open
------------------------------------------------------------------------