If you copy this to Script Editor and save it as an Application with options 
stay-open, no startup  as ³MacToUnicodeHTML - R*ch² (where the last four characters 
MUST be the creator type of your text editor) you will have a useful droplet to 
convert Mac text files to html.  You can extend the table, if you like, to do more 
than just convert the 8-bit characters, eg,

\">\"=>\">\"

The script will read the dropped file and open the converted result in a temporary 
document.


NO charset should be included in the content-type declaration of the final html file.

JD


(*
Save as Stay-open AppleScript Droplet named: ³MacToUnicodeHTML - R*ch²
*)
property editor : "R*ch"
on run
  set editor to text -4 through -1 of ("" & (path to me))
  return
end run
on open flist
  set fin to "" & item 1 of (flist as list)
  set editor to text -4 through -1 of ("" & (path to me))
  set fout to "" & (path to temporary items folder) & "temp.out"
  set n to open for access file fout
  close access n
  set cmd to {} & "%macToUni = (
\"\\x80\"=>\"Ä\",  # LATIN CAPITAL LETTER A WITH DIAERESIS
\"\\x81\"=>\"Å\",  # LATIN CAPITAL LETTER A WITH RING ABOVE
\"\\x82\"=>\"Ç\",  # LATIN CAPITAL LETTER C WITH CEDILLA
\"\\x83\"=>\"É\",  # LATIN CAPITAL LETTER E WITH ACUTE
\"\\x84\"=>\"Ñ\",  # LATIN CAPITAL LETTER N WITH TILDE
\"\\x85\"=>\"Ö\",  # LATIN CAPITAL LETTER O WITH DIAERESIS
\"\\x86\"=>\"Ü\",  # LATIN CAPITAL LETTER U WITH DIAERESIS
\"\\x87\"=>\"á\",  # LATIN SMALL LETTER A WITH ACUTE
\"\\x88\"=>\"à\",  # LATIN SMALL LETTER A WITH GRAVE
\"\\x89\"=>\"â\",  # LATIN SMALL LETTER A WITH CIRCUMFLEX
\"\\x8A\"=>\"ä\",  # LATIN SMALL LETTER A WITH DIAERESIS
\"\\x8B\"=>\"ã\",  # LATIN SMALL LETTER A WITH TILDE
\"\\x8C\"=>\"å\",  # LATIN SMALL LETTER A WITH RING ABOVE
\"\\x8D\"=>\"ç\",  # LATIN SMALL LETTER C WITH CEDILLA
\"\\x8E\"=>\"é\",  # LATIN SMALL LETTER E WITH ACUTE
\"\\x8F\"=>\"è\",  # LATIN SMALL LETTER E WITH GRAVE
\"\\x90\"=>\"ê\",  # LATIN SMALL LETTER E WITH CIRCUMFLEX
\"\\x91\"=>\"ë\",  # LATIN SMALL LETTER E WITH DIAERESIS
\"\\x92\"=>\"í\",  # LATIN SMALL LETTER I WITH ACUTE
\"\\x93\"=>\"ì\",  # LATIN SMALL LETTER I WITH GRAVE
\"\\x94\"=>\"î\",  # LATIN SMALL LETTER I WITH CIRCUMFLEX
\"\\x95\"=>\"ï\",  # LATIN SMALL LETTER I WITH DIAERESIS
\"\\x96\"=>\"ñ\",  # LATIN SMALL LETTER N WITH TILDE
\"\\x97\"=>\"ó\",  # LATIN SMALL LETTER O WITH ACUTE
\"\\x98\"=>\"ò\",  # LATIN SMALL LETTER O WITH GRAVE
\"\\x99\"=>\"ô\",  # LATIN SMALL LETTER O WITH CIRCUMFLEX
\"\\x9A\"=>\"ö\",  # LATIN SMALL LETTER O WITH DIAERESIS
\"\\x9B\"=>\"õ\",  # LATIN SMALL LETTER O WITH TILDE
\"\\x9C\"=>\"ú\",  # LATIN SMALL LETTER U WITH ACUTE
\"\\x9D\"=>\"ù\",  # LATIN SMALL LETTER U WITH GRAVE
\"\\x9E\"=>\"û\",  # LATIN SMALL LETTER U WITH CIRCUMFLEX
\"\\x9F\"=>\"ü\",  # LATIN SMALL LETTER U WITH DIAERESIS
\"\\xA0\"=>\"†\",  # DAGGER
\"\\xA1\"=>\"°\",  # DEGREE SIGN
\"\\xA2\"=>\"¢\",  # CENT SIGN
\"\\xA3\"=>\"£\",  # POUND SIGN
\"\\xA4\"=>\"§\",  # SECTION SIGN
\"\\xA5\"=>\"•\",  # BULLET
\"\\xA6\"=>\"¶\",  # PILCROW SIGN
\"\\xA7\"=>\"ß\",  # LATIN SMALL LETTER SHARP S
\"\\xA8\"=>\"®\",  # REGISTERED SIGN
\"\\xA9\"=>\"©\",  # COPYRIGHT SIGN
\"\\xAA\"=>\"™\",  # TRADE MARK SIGN
\"\\xAB\"=>\"´\",  # ACUTE ACCENT
\"\\xAC\"=>\"¨\",  # DIAERESIS
\"\\xAD\"=>\"≠\",  # NOT EQUAL TO
\"\\xAE\"=>\"Æ\",  # LATIN CAPITAL LETTER AE
\"\\xAF\"=>\"Ø\",  # LATIN CAPITAL LETTER O WITH STROKE
\"\\xB0\"=>\"∞\",  # INFINITY
\"\\xB1\"=>\"±\",  # PLUS-MINUS SIGN
\"\\xB2\"=>\"≤\",  # LESS-THAN OR EQUAL TO
\"\\xB3\"=>\"≥\",  # GREATER-THAN OR EQUAL TO
\"\\xB4\"=>\"¥\",  # YEN SIGN
\"\\xB5\"=>\"µ\",  # MICRO SIGN
\"\\xB6\"=>\"∂\",  # PARTIAL DIFFERENTIAL
\"\\xB7\"=>\"∑\",  # N-ARY SUMMATION
\"\\xB8\"=>\"∏\",  # N-ARY PRODUCT
\"\\xB9\"=>\"π\",  # GREEK SMALL LETTER PI
\"\\xBA\"=>\"∫\",  # INTEGRAL
\"\\xBB\"=>\"ª\",  # FEMININE ORDINAL INDICATOR
\"\\xBC\"=>\"º\",  # MASCULINE ORDINAL INDICATOR
\"\\xBD\"=>\"Ω\",  # GREEK CAPITAL LETTER OMEGA
\"\\xBE\"=>\"æ\",  # LATIN SMALL LETTER AE
\"\\xBF\"=>\"ø\",  # LATIN SMALL LETTER O WITH STROKE
\"\\xC0\"=>\"¿\",  # INVERTED QUESTION MARK
\"\\xC1\"=>\"¡\",  # INVERTED EXCLAMATION MARK
\"\\xC2\"=>\"¬\",  # NOT SIGN
\"\\xC3\"=>\"√\",  # SQUARE ROOT
\"\\xC4\"=>\"ƒ\",  # LATIN SMALL LETTER F WITH HOOK
\"\\xC5\"=>\"≈\",  # ALMOST EQUAL TO
\"\\xC6\"=>\"∆\",  # INCREMENT
\"\\xC7\"=>\"«\",  # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
\"\\xC8\"=>\"»\",  # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
\"\\xC9\"=>\"…\",  # HORIZONTAL ELLIPSIS
\"\\xCA\"=>\" \",  # NO-BREAK SPACE
\"\\xCB\"=>\"À\",  # LATIN CAPITAL LETTER A WITH GRAVE
\"\\xCC\"=>\"Ã\",  # LATIN CAPITAL LETTER A WITH TILDE
\"\\xCD\"=>\"Õ\",  # LATIN CAPITAL LETTER O WITH TILDE
\"\\xCE\"=>\"Œ\",  # LATIN CAPITAL LIGATURE OE
\"\\xCF\"=>\"œ\",  # LATIN SMALL LIGATURE OE
\"\\xD0\"=>\"–\",  # EN DASH
\"\\xD1\"=>\"—\",  # EM DASH
\"\\xD2\"=>\"“\",  # LEFT DOUBLE QUOTATION MARK
\"\\xD3\"=>\"”\",  # RIGHT DOUBLE QUOTATION MARK
\"\\xD4\"=>\"‘\",  # LEFT SINGLE QUOTATION MARK
\"\\xD5\"=>\"’\",  # RIGHT SINGLE QUOTATION MARK
\"\\xD6\"=>\"÷\",  # DIVISION SIGN
\"\\xD7\"=>\"◊\",  # LOZENGE
\"\\xD8\"=>\"ÿ\",  # LATIN SMALL LETTER Y WITH DIAERESIS
\"\\xD9\"=>\"Ÿ\",  # LATIN CAPITAL LETTER Y WITH DIAERESIS
\"\\xDA\"=>\"⁄\",  # FRACTION SLASH
\"\\xDB\"=>\"€\",  # EURO SIGN
\"\\xDC\"=>\"‹\",  # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
\"\\xDD\"=>\"›\",  # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
\"\\xDE\"=>\"fi\",  # LATIN SMALL LIGATURE FI
\"\\xDF\"=>\"fl\",  # LATIN SMALL LIGATURE FL
\"\\xE0\"=>\"‡\",  # DOUBLE DAGGER
\"\\xE1\"=>\"·\",  # MIDDLE DOT
\"\\xE2\"=>\"‚\",  # SINGLE LOW-9 QUOTATION MARK
\"\\xE3\"=>\"„\",  # DOUBLE LOW-9 QUOTATION MARK
\"\\xE4\"=>\"‰\",  # PER MILLE SIGN
\"\\xE5\"=>\"Â\",  # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
\"\\xE6\"=>\"Ê\",  # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
\"\\xE7\"=>\"Á\",  # LATIN CAPITAL LETTER A WITH ACUTE
\"\\xE8\"=>\"Ë\",  # LATIN CAPITAL LETTER E WITH DIAERESIS
\"\\xE9\"=>\"È\",  # LATIN CAPITAL LETTER E WITH GRAVE
\"\\xEA\"=>\"Í\",  # LATIN CAPITAL LETTER I WITH ACUTE
\"\\xEB\"=>\"Î\",  # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
\"\\xEC\"=>\"Ï\",  # LATIN CAPITAL LETTER I WITH DIAERESIS
\"\\xED\"=>\"Ì\",  # LATIN CAPITAL LETTER I WITH GRAVE
\"\\xEE\"=>\"Ó\",  # LATIN CAPITAL LETTER O WITH ACUTE
\"\\xEF\"=>\"Ô\",  # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
\"\\xF0\"=>\"\",  # Apple logo
\"\\xF1\"=>\"Ò\",  # LATIN CAPITAL LETTER O WITH GRAVE
\"\\xF2\"=>\"Ú\",  # LATIN CAPITAL LETTER U WITH ACUTE
\"\\xF3\"=>\"Û\",  # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
\"\\xF4\"=>\"Ù\",  # LATIN CAPITAL LETTER U WITH GRAVE
\"\\xF5\"=>\"ı\",  # LATIN SMALL LETTER DOTLESS I
\"\\xF6\"=>\"ˆ\",  # MODIFIER LETTER CIRCUMFLEX ACCENT
\"\\xF7\"=>\"˜\",  # SMALL TILDE
\"\\xF8\"=>\"¯\",  # MACRON
\"\\xF9\"=>\"˘\",  # BREVE
\"\\xFA\"=>\"˙\",  # DOT ABOVE
\"\\xFB\"=>\"˚\",  # RING ABOVE
\"\\xFC\"=>\"¸\",  # CEDILLA
\"\\xFD\"=>\"˝\",  # DOUBLE ACUTE ACCENT
\"\\xFE\"=>\"˛\",  # OGONEK
\"\\xFF\"=>\"ˇ\"  # CARON
);
  $fin = $ARGV[0];
  $tmp = $ENV{TMPDIR};
  $fout = $tmp . 'temp.out';
  open FOUT, \">$fout\" or die $!;
  open FIN, $fin or die $!;
  while (read FIN, $_, 2^15) {
    s~([\\x80-\\xFF])~$macToUni{$1}~g;
    print FOUT;
  }
  close FOUT;"
  set end of cmd to fin
  tell application "MacPerl"
    Do Script cmd
  end tell
  tell application "Finder"
    open file fout using application file id editor
  end tell
end open
------------------------------------------------------------------------


Reply via email to