Le 8 févr. 09 à 16:50, وفا خلیقی a écrit :
Is there a sample code that I can study?
Here is an OTP file I wrote in 1994. It works in the following way: it
takes input coded in Unicode and translates codepoints depending on
the form:
initial form is translated by DC00
medial form is translated by DE00
final form is translated by E000
isolated form is translated by DA00
short vowels (called ACCENT in the file) are translated by DA00
short vowels + shadda are translated by DA90
The lam-alif and alif-lam-lam-hah ligatures are treated in a different
OTP.
%%% ====================================================================
%%% @OmegaTranslationProcess-file{
%%% author = "Yannis Haralambous",
%%% version = "1.0",
%%% date = "16/07/94"
%%% time = "9:53:26 MET"
%%% filename = "UnicodeToContUnicode.otp",
%%% address = "187, rue Nationale,
%%% 59800 Lille, France"
%%% telephone = "",
%%% FAX = "+33 20402864",
%%% checksum = "",
%%% email = "[email protected]",
%%% codetable = "ISO/ASCII",
%%% keywords = "",
%%% supported = "yes",
%%% docstring = "This file is part of the public domain
%%% Arabic-script Omega system.
%%% Omega is a TeX extension (C) John Plaice
%%% and Yannis Haralambous, 1994"
%%% }
%%% ====================================================================
input: 2;
output: 2;
states: MEDIAL,NUMERAL;
aliases:
SHADDA = (@"FFFF); % was 0651
UNIFORM = (@"0621 | @"0674 | @"066E | @"066F | @"06EF | @"063F);
SPECIAL = (@"FDF2-@"FDF4) ;
BIFORM = (@"0605 | @"0606 | @"0613 | @"0622-@"0625 | @"0627 |
@"0629 | @"062F-@"0632 | @"0648 |
@"065D | @"065E |
@"0671-@"0673 | @"0675-@"0677 | @"0688-@"069A |
@"06BA | @"06C0-@"06CB | @"06CD | @"06D2 | @"06D3 |
@"06FF | @"0710 | @"0715-@"0719 | @"071E | @"0724 |
@"0728 | @"072A | @"072C | @"072F | @"0750);
QUADRIFORM = (@"0600-@"0604 | @"0607-@"060B | @"060C-@"0612 | @"0616 |
@"0626 | @"0628 | @"062A-@"062E | @"0633-@"063A |
@"0640-@"0647 |
@"0649 | @"064A |
%%% @"0655-@"0657 | @"065B | @"065C |
@"0659 |
@"0678-@"0687 | @"069A-@"06B7 |
@"06BB-@"06BF | @"06CC | @"06CE | @"06D0 | @"06D1 |
@"06FE | @"0712-@"0714 | @"071A-@"071D | @"071F-@"0723 |
@"0725-@"0727 | @"0729 | @"072B | @"0751-@"0757);
ACCENT = (@"064B-@"0658 | @"0670 | @"0711 | @"0730-@"074F);
ARABIC_LETTER = ({BIFORM} | {QUADRIFORM});
NOT_ARABIC_LETTER = ^(@"0600-@"060B | @"060D-@"0613 | @"0616 | @"0621-
@"065F | @"0670-@"06D3 | @"0710-@"075F);
NOT_ARABIC_OR_UNI = ({NOT_ARABIC_LETTER}|{UNIFORM});
ARABIC_NUMBER = (@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
NOT_ARABIC_NUMBER = ^(@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
LAM_LIKE = (@"0644 | @"06B5-@"06B7 | @"06FE);
ALIF_LIKE = (@"0622|@"0623|@"0625|@"0627|@"0671-@"0673);
expressions:
{UNIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DA00) #(\3 + @"DA90)
;
{UNIFORM}{ACCENT}
=> #(\1 + @"DA00) #(\2 + @"DA00)
;
{UNIFORM}
=> #(\1 + @"DA00)
;
{SPECIAL}{SHADDA}{ACCENT}
=> \1 #(\3 + @"DA90)
;
{SPECIAL}{ACCENT}
=> \1 #(\2 + @"DA00)
;
{SPECIAL}
=> \1
;
<NUMERAL>{ARABIC_NUMBER} end:
=> #(\1) "\endL{}"
<pop:>
;
<NUMERAL>{ARABIC_NUMBER}
=> #(\1)
;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER} end:
=> #(\1) #(\2) "\endL{}"
<pop:>
;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER}
=> #(\1) #(\2)
;
<NUMERAL>{NOT_ARABIC_NUMBER}
=> "\endL{}"
<= #(\1)
<pop:>
;
(@"002B|@"002D|@"002E){ARABIC_NUMBER} end:
=> "\beginL{}" #(\1) #(\2) "\endL{}"
;
(@"002B|@"002D|@"002E){ARABIC_NUMBER}
=> "\beginL{}" #(\1) #(\2)
<push: NUMERAL>
;
{ARABIC_NUMBER} end:
=> #(\1)
;
{ARABIC_NUMBER}
=> "\beginL{}" #(\1)
<push: NUMERAL>
;
{ACCENT}
=> #(\1 + @"DA00)
;
{NOT_ARABIC_LETTER}
=> #(\1)
;
{QUADRIFORM}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) <= \2
;
{QUADRIFORM} end:
=> #(\1 + @"DA00)
;
{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) #(\3 + @"DA90)
<= #(\4)
;
{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
<= #(\4)
;
{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"DA00) #(\2 + @"DA00)
<= #(\3)
;
{QUADRIFORM}{SHADDA}{ACCENT} end:
=> #(\1 + @"DA00) #(\3 + @"DA90)
;
{QUADRIFORM}{ACCENT}{ACCENT} end:
=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
;
{QUADRIFORM}{ACCENT} end:
=> #(\1 + @"DA00) #(\2 + @"DA00)
;
% @"0620 is our internal keshideh (not Unicode keshideh which is @"0640)
{QUADRIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DC00) #(\3 + @"DA90) @"0620
<push: MEDIAL>
;
{QUADRIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"DC00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
<push: MEDIAL>
;
{QUADRIFORM}{ACCENT}
=> #(\1 + @"DC00) #(\2 + @"DA00) @"0620
<push: MEDIAL>
;
{QUADRIFORM}
=> #(\1 + @"DC00) @"0620
<push: MEDIAL>
;
{BIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DA00) #(\3 + @"DA90)
;
{BIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
;
{BIFORM}{ACCENT}
=> #(\1 + @"DA00) #(\2 + @"DA00)
;
{BIFORM}
=> #(\1 + @"DA00)
;
<MEDIAL>{QUADRIFORM}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000)
<= #(\2)
<pop:>
;
<MEDIAL>{QUADRIFORM} end:
=> #(\1 + @"E000)
<pop:>
;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000) #(\3 + @"DA90)
<= #(\4)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
<= #(\4)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
=> #(\1 + @"E000) #(\2 + @"DA00)
<= #(\3)
<pop:>
;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT} end:
=> #(\1 + @"E000) #(\3 + @"DA90)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT} end:
=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
<pop:>
;
<MEDIAL>{QUADRIFORM}{ACCENT} end:
=> #(\1 + @"E000) #(\2 + @"DA00)
<pop:>
;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"DE00) #(\3 + @"DA90) @"0620
;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"DE00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
;
<MEDIAL>{QUADRIFORM}{ACCENT}
=> #(\1 + @"DE00) #(\2 + @"DA00) @"0620
;
<MEDIAL>{QUADRIFORM}
=> #(\1 + @"DE00) @"0620
;
<MEDIAL>{BIFORM}{SHADDA}{ACCENT}
=> #(\1 + @"E000) #(\3 + @"DA90)
<pop:>
;
<MEDIAL>{BIFORM}{ACCENT}{ACCENT}
=> #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
<pop:>
;
<MEDIAL>{BIFORM}{ACCENT}
=> #(\1 + @"E000) #(\2 + @"DA00)
<pop:>
;
<MEDIAL>{BIFORM}
=> #(\1 + @"E000)
<pop:>
;
@"F000-@"F07F => \1 ;