Le 8 févr. 09 à 16:50, وفا خلیقی a écrit :

Is there a sample code that I can study?

Here is an OTP file I wrote in 1994. It works in the following way: it takes input coded in Unicode and translates codepoints depending on the form:

initial form is translated by DC00
medial form is translated by DE00
final form is translated by E000
isolated form is translated by DA00

short vowels (called ACCENT in the file) are translated by DA00
short vowels + shadda are translated by DA90

The lam-alif and alif-lam-lam-hah ligatures are treated in a different OTP.


%%% ====================================================================
%%%  @OmegaTranslationProcess-file{
%%%     author          = "Yannis Haralambous",
%%%     version         = "1.0",
%%%     date            = "16/07/94"
%%%     time            = "9:53:26 MET"
%%%     filename        = "UnicodeToContUnicode.otp",
%%%     address         = "187, rue Nationale,
%%%                        59800 Lille, France"
%%%     telephone       = "",
%%%     FAX             = "+33 20402864",
%%%     checksum        = "",
%%%     email           = "[email protected]",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "",
%%%     supported       = "yes",
%%%     docstring       = "This file is part of the public domain
%%%                        Arabic-script Omega system.
%%%                        Omega is a TeX extension (C) John Plaice
%%%                        and Yannis Haralambous, 1994"
%%%  }
%%% ====================================================================
input: 2;
output: 2;

states: MEDIAL,NUMERAL;

aliases:

SHADDA          = (@"FFFF); % was 0651
UNIFORM           = (@"0621 | @"0674 | @"066E | @"066F | @"06EF | @"063F);
SPECIAL           = (@"FDF2-@"FDF4) ;
BIFORM = (@"0605 | @"0606 | @"0613 | @"0622-@"0625 | @"0627 | @"0629 | @"062F-@"0632 | @"0648 |
                     @"065D | @"065E |
                     @"0671-@"0673 | @"0675-@"0677 | @"0688-@"069A |
                     @"06BA | @"06C0-@"06CB | @"06CD | @"06D2 | @"06D3 |
                     @"06FF | @"0710 | @"0715-@"0719 | @"071E | @"0724 |
                     @"0728 | @"072A | @"072C | @"072F | @"0750);
QUADRIFORM        = (@"0600-@"0604 | @"0607-@"060B | @"060C-@"0612 | @"0616 |
                         @"0626 | @"0628 | @"062A-@"062E | @"0633-@"063A |
                     @"0640-@"0647 |
                     @"0649 | @"064A |
%%%                  @"0655-@"0657 | @"065B | @"065C |
             @"0659 |
                     @"0678-@"0687 | @"069A-@"06B7 |
                     @"06BB-@"06BF | @"06CC | @"06CE | @"06D0 | @"06D1 |
                     @"06FE | @"0712-@"0714 | @"071A-@"071D | @"071F-@"0723 |
                     @"0725-@"0727 | @"0729 | @"072B | @"0751-@"0757);
ACCENT            = (@"064B-@"0658 | @"0670 | @"0711 | @"0730-@"074F);
ARABIC_LETTER     = ({BIFORM} | {QUADRIFORM});
NOT_ARABIC_LETTER = ^(@"0600-@"060B | @"060D-@"0613 | @"0616 | @"0621- @"065F | @"0670-@"06D3 | @"0710-@"075F);
NOT_ARABIC_OR_UNI = ({NOT_ARABIC_LETTER}|{UNIFORM});
ARABIC_NUMBER     = (@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
NOT_ARABIC_NUMBER = ^(@"0030-@"0039 | @"0660-@"0669 | @"06F0-@"06F9);
LAM_LIKE = (@"0644 | @"06B5-@"06B7 | @"06FE);
ALIF_LIKE = (@"0622|@"0623|@"0625|@"0627|@"0671-@"0673);

expressions:

{UNIFORM}{SHADDA}{ACCENT}
        => #(\1 + @"DA00) #(\3 + @"DA90)
        ;
{UNIFORM}{ACCENT}
        => #(\1 + @"DA00) #(\2 + @"DA00)
        ;
{UNIFORM}
        => #(\1 + @"DA00)
        ;
{SPECIAL}{SHADDA}{ACCENT}
        => \1 #(\3 + @"DA90)
        ;
{SPECIAL}{ACCENT}
        => \1 #(\2 + @"DA00)
        ;
{SPECIAL}
        => \1
        ;
<NUMERAL>{ARABIC_NUMBER} end:
        => #(\1) "\endL{}"
        <pop:>
        ;
<NUMERAL>{ARABIC_NUMBER}
        => #(\1)
        ;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER} end:
        => #(\1) #(\2) "\endL{}"
        <pop:>
        ;
<NUMERAL>(@"002B|@"002D|@"002E|@"066B|@"066C){ARABIC_NUMBER}
        => #(\1) #(\2)
        ;
<NUMERAL>{NOT_ARABIC_NUMBER}
        => "\endL{}"
        <= #(\1)
        <pop:>
        ;
(@"002B|@"002D|@"002E){ARABIC_NUMBER} end:
        => "\beginL{}" #(\1) #(\2) "\endL{}"
        ;
(@"002B|@"002D|@"002E){ARABIC_NUMBER}
        => "\beginL{}" #(\1) #(\2)
        <push: NUMERAL>
        ;
{ARABIC_NUMBER} end:
        => #(\1)
        ;
{ARABIC_NUMBER}
        => "\beginL{}" #(\1)
        <push: NUMERAL>
        ;
{ACCENT}
        => #(\1 + @"DA00)
        ;
{NOT_ARABIC_LETTER}
        => #(\1)
        ;
{QUADRIFORM}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"DA00) <= \2
        ;
{QUADRIFORM} end:
        => #(\1 + @"DA00)
        ;
{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"DA00) #(\3 + @"DA90)
        <= #(\4)
        ;
{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
        <= #(\4)
        ;
{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"DA00) #(\2 + @"DA00)
        <= #(\3)
        ;
{QUADRIFORM}{SHADDA}{ACCENT} end:
        => #(\1 + @"DA00) #(\3 + @"DA90)
        ;
{QUADRIFORM}{ACCENT}{ACCENT} end:
        => #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
        ;
{QUADRIFORM}{ACCENT} end:
        => #(\1 + @"DA00) #(\2 + @"DA00)
        ;
        
% @"0620 is our internal keshideh (not Unicode keshideh which is @"0640)

{QUADRIFORM}{SHADDA}{ACCENT}
        => #(\1 + @"DC00) #(\3 + @"DA90) @"0620
        <push: MEDIAL>
        ;
{QUADRIFORM}{ACCENT}{ACCENT}
        => #(\1 + @"DC00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
        <push: MEDIAL>
        ;
{QUADRIFORM}{ACCENT}
        => #(\1 + @"DC00) #(\2 + @"DA00) @"0620
        <push: MEDIAL>
        ;
{QUADRIFORM}
        => #(\1 + @"DC00) @"0620
        <push: MEDIAL>
        ;
{BIFORM}{SHADDA}{ACCENT}
        => #(\1 + @"DA00) #(\3 + @"DA90)
        ;
{BIFORM}{ACCENT}{ACCENT}
        => #(\1 + @"DA00) #(\2 + @"DA00) #(\3 + @"DA00)
        ;
{BIFORM}{ACCENT}
        => #(\1 + @"DA00) #(\2 + @"DA00)
        ;
{BIFORM}
        => #(\1 + @"DA00)
        ;
<MEDIAL>{QUADRIFORM}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"E000)
        <= #(\2)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM} end:
        => #(\1 + @"E000)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"E000) #(\3 + @"DA90)
        <= #(\4)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
        <= #(\4)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM}{ACCENT}{NOT_ARABIC_OR_UNI}
        => #(\1 + @"E000) #(\2 + @"DA00)
        <= #(\3)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT} end:
        => #(\1 + @"E000) #(\3 + @"DA90)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT} end:
        => #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM}{ACCENT} end:
        => #(\1 + @"E000) #(\2 + @"DA00)
        <pop:>
        ;
<MEDIAL>{QUADRIFORM}{SHADDA}{ACCENT}
        => #(\1 + @"DE00) #(\3 + @"DA90) @"0620
        ;
<MEDIAL>{QUADRIFORM}{ACCENT}{ACCENT}
        => #(\1 + @"DE00) #(\2 + @"DA00) #(\3 + @"DA00) @"0620
        ;
<MEDIAL>{QUADRIFORM}{ACCENT}
        => #(\1 + @"DE00) #(\2 + @"DA00) @"0620
        ;
<MEDIAL>{QUADRIFORM}
        => #(\1 + @"DE00) @"0620
        ;
<MEDIAL>{BIFORM}{SHADDA}{ACCENT}
        => #(\1 + @"E000) #(\3 + @"DA90)
        <pop:>
        ;
<MEDIAL>{BIFORM}{ACCENT}{ACCENT}
        => #(\1 + @"E000) #(\2 + @"DA00) #(\3 + @"DA00)
        <pop:>
        ;
<MEDIAL>{BIFORM}{ACCENT}
        => #(\1 + @"E000) #(\2 + @"DA00)
        <pop:>
        ;
<MEDIAL>{BIFORM}
        => #(\1 + @"E000)
        <pop:>
        ;
        
@"F000-@"F07F => \1 ;

Reply via email to