Hi,

This patch replaces switch statements in arabic.c with a static array of
struct achars.  This makes adding new letters a lot easier and that
usually involves only adding an entry to this array (I've already added
Farsi letters).

This is also a clean up patch and reduces more than 1k lines.  After
applying this patch arabic.h holds only a macro definition.  So it is
probably a good idea to remove arabic.h completely and move the trivial
ARABIC_CHAR macro to another header.

        Ali
---
 src/arabic.c | 1127 ++++++++++------------------------------------------------
 src/arabic.h |  243 +-------------
 src/mbyte.c  |   30 --
 3 files changed, 189 insertions(+), 1211 deletions(-)

diff --git a/src/arabic.c b/src/arabic.c
--- a/src/arabic.c
+++ b/src/arabic.c
@@ -15,917 +15,156 @@
  * --
  *
  * Author: Nadim Shaikli & Isam Bayazidi
+ * Farsi support and restructuring to make adding new
+ *   letters easier by Ali Gholami Rudi
  *
  */
 
-static int  A_is_a __ARGS((int cur_c));
-static int  A_is_s __ARGS((int cur_c));
-static int  A_is_f __ARGS((int cur_c));
-static int  chg_c_a2s __ARGS((int cur_c));
-static int  chg_c_a2i __ARGS((int cur_c));
-static int  chg_c_a2m __ARGS((int cur_c));
-static int  chg_c_a2f __ARGS((int cur_c));
-static int  chg_c_i2m __ARGS((int cur_c));
-static int  chg_c_f2m __ARGS((int cur_c));
+/*
+ * Sorted list of unicode Arabic characters.  Each entry holds the
+ * presentation forms of a letter.
+ *
+ * Arabic characters are categorized into following types:
+ *
+ * Isolated    - iso-8859-6 form
+ * Initial     - unicode form-B start
+ * Medial      - unicode form-B middle
+ * Final       - unicode form-B final
+ * Stand-Alone - unicode form-B isolated
+ *
+ * The fields in the struct represent:
+ *
+ *  s -> isolated
+ *  i -> initial
+ *  m -> medial
+ *  f -> final
+ *
+ */
+static struct achar {
+       unsigned c;
+       unsigned s;
+       unsigned i;
+       unsigned m;
+       unsigned f;
+} achars[] = {
+       {0x0621, 0xfe80},                               /* a_HAMZA */
+       {0x0622, 0xfe81, 0, 0, 0xfe82},                 /* a_ALEF_MADDA */
+       {0x0623, 0xfe83, 0, 0, 0xfe84},                 /* a_ALEF_HAMZA_ABOVE */
+       {0x0624, 0xfe85, 0, 0, 0xfe85},                 /* a_WAW_HAMZA */
+       {0x0625, 0xfe87, 0, 0, 0xfe88},                 /* a_ALEF_HAMZA_BELOW */
+       {0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},       /* a_YEH_HAMZA */
+       {0x0627, 0xfe8d, 0, 0, 0xfe8e},                 /* a_ALEF */
+       {0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},       /* a_BEH */
+       {0x0629, 0xfe93, 0, 0, 0xfe94},                 /* a_TEH_MARBUTA */
+       {0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},       /* a_TEH */
+       {0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},       /* a_THEH */
+       {0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},       /* a_JEEM */
+       {0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},       /* a_HAH */
+       {0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},       /* a_KHAH */
+       {0x062f, 0xfea9, 0, 0, 0xfeaa},                 /* a_DAL */
+       {0x0630, 0xfeab, 0, 0, 0xfeac},                 /* a_THAL */
+       {0x0631, 0xfead, 0, 0, 0xfeae},                 /* a_REH */
+       {0x0632, 0xfeaf, 0, 0, 0xfeb0},                 /* a_ZAIN */
+       {0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},       /* a_SEEN */
+       {0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},       /* a_SHEEN */
+       {0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},       /* a_SAD */
+       {0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},       /* a_DAD */
+       {0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},       /* a_TAH */
+       {0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},       /* a_ZAH */
+       {0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},       /* a_AIN */
+       {0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},       /* a_GHAIN */
+       {0x0640},                                       /* a_TATWEEL */
+       {0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},       /* a_FEH */
+       {0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},       /* a_QAF */
+       {0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},       /* a_KAF */
+       {0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},       /* a_LAM */
+       {0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},       /* a_MEEM */
+       {0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},       /* a_NOON */
+       {0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},       /* a_HEH */
+       {0x0648, 0xfeed, 0, 0, 0xfeee},                 /* a_WAW */
+       {0x0649, 0xfeef, 0, 0, 0xfef0},                 /* a_ALEF_MAKSURA */
+       {0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},       /* a_YEH */
+       {0x064b, 0xfe70},                               /* a_FATHATAN */
+       {0x064c, 0xfe72},                               /* a_DAMMATAN */
+       {0x064d, 0xfe74},                               /* a_KASRATAN */
+       {0x064e, 0xfe76, 0, 0xfe77, 0},                 /* a_FATHA */
+       {0x064f, 0xfe78, 0, 0xfe79, 0},                 /* a_DAMMA */
+       {0x0650, 0xfe7a, 0, 0xfe7b, 0},                 /* a_KASRA */
+       {0x0651, 0xfe7c, 0, 0xfe7c, 0},                 /* a_SHADDA */
+       {0x0652, 0xfe7e, 0, 0xfe7f, 0},                 /* a_SUKUN */
+       {0x0653},                                       /* a_MADDA_ABOVE */
+       {0x0654},                                       /* a_HAMZA_ABOVE */
+       {0x0655},                                       /* a_HAMZA_BELOW */
+       {0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},       /* a_PEH */
+       {0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},       /* a_TCHEH */
+       {0x0698, 0xfb8a, 0, 0, 0xfb8b},                 /* a_JEH */
+       {0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},       /* a_FKAF */
+       {0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},       /* a_GAF */
+       {0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},       /* a_FYEH */
+};
+
+/* these values are hardcoded in functions */
+#define a_HAMZA                                0x0621
+#define a_ALEF_MADDA                   0x0622
+#define a_ALEF_HAMZA_ABOVE             0x0623
+#define a_ALEF_HAMZA_BELOW             0x0625
+#define a_ALEF                         0x0627
+#define a_LAM                          0x0644
+#define a_FATHATAN                     0x064b
+#define a_s_FATHATAN                   0xfe70
+#define a_s_DAMMATAN                   0xfe72
+#define a_s_KASRATAN                   0xfe74
+#define a_FATHA                                0x064e
+#define a_s_FATHA                      0xfe76
+#define a_s_HAMZA                      0xfe81
+#define a_SUKUN                                0x0652
+#define a_MINI_ALEF                    0x0670
+
+#define a_s_LAM_ALEF_MADDA_ABOVE       0xfef5
+#define a_f_LAM_ALEF_MADDA_ABOVE       0xfef6
+#define a_s_LAM_ALEF_HAMZA_ABOVE       0xfef7
+#define a_f_LAM_ALEF_HAMZA_ABOVE       0xfef8
+#define a_s_LAM_ALEF_HAMZA_BELOW       0xfef9
+#define a_f_LAM_ALEF_HAMZA_BELOW       0xfefa
+#define a_s_LAM_ALEF                   0xfefb
+#define a_f_LAM_ALEF                   0xfefc
+
+#define a_BYTE_ORDER_MARK              0xfeff
+
+#define ARRAY_SIZE(a)          (sizeof(a) / sizeof((a)[0]))
+
 static int  chg_c_laa2i __ARGS((int hid_c));
 static int  chg_c_laa2f __ARGS((int hid_c));
-static int  half_shape __ARGS((int c));
 static int  A_firstc_laa __ARGS((int c1, int c));
 static int  A_is_harakat __ARGS((int c));
 static int  A_is_iso __ARGS((int c));
-static int  A_is_formb __ARGS((int c));
 static int  A_is_ok __ARGS((int c));
 static int  A_is_valid __ARGS((int c));
 static int  A_is_special __ARGS((int c));
 
-
-/*
- * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
- */
-    static int
-A_is_a(cur_c)
-    int cur_c;
-{
-    switch (cur_c)
-    {
-       case a_HAMZA:
-       case a_ALEF_MADDA:
-       case a_ALEF_HAMZA_ABOVE:
-       case a_WAW_HAMZA:
-       case a_ALEF_HAMZA_BELOW:
-       case a_YEH_HAMZA:
-       case a_ALEF:
-       case a_BEH:
-       case a_TEH_MARBUTA:
-       case a_TEH:
-       case a_THEH:
-       case a_JEEM:
-       case a_HAH:
-       case a_KHAH:
-       case a_DAL:
-       case a_THAL:
-       case a_REH:
-       case a_ZAIN:
-       case a_SEEN:
-       case a_SHEEN:
-       case a_SAD:
-       case a_DAD:
-       case a_TAH:
-       case a_ZAH:
-       case a_AIN:
-       case a_GHAIN:
-       case a_TATWEEL:
-       case a_FEH:
-       case a_QAF:
-       case a_KAF:
-       case a_LAM:
-       case a_MEEM:
-       case a_NOON:
-       case a_HEH:
-       case a_WAW:
-       case a_ALEF_MAKSURA:
-       case a_YEH:
-           return TRUE;
-    }
-
-    return FALSE;
-}
-
-
-/*
- * Returns True if c is an Isolated Form-B ARABIC letter
- */
-    static int
-A_is_s(cur_c)
-    int cur_c;
-{
-    switch (cur_c)
-    {
-       case a_s_HAMZA:
-       case a_s_ALEF_MADDA:
-       case a_s_ALEF_HAMZA_ABOVE:
-       case a_s_WAW_HAMZA:
-       case a_s_ALEF_HAMZA_BELOW:
-       case a_s_YEH_HAMZA:
-       case a_s_ALEF:
-       case a_s_BEH:
-       case a_s_TEH_MARBUTA:
-       case a_s_TEH:
-       case a_s_THEH:
-       case a_s_JEEM:
-       case a_s_HAH:
-       case a_s_KHAH:
-       case a_s_DAL:
-       case a_s_THAL:
-       case a_s_REH:
-       case a_s_ZAIN:
-       case a_s_SEEN:
-       case a_s_SHEEN:
-       case a_s_SAD:
-       case a_s_DAD:
-       case a_s_TAH:
-       case a_s_ZAH:
-       case a_s_AIN:
-       case a_s_GHAIN:
-       case a_s_FEH:
-       case a_s_QAF:
-       case a_s_KAF:
-       case a_s_LAM:
-       case a_s_MEEM:
-       case a_s_NOON:
-       case a_s_HEH:
-       case a_s_WAW:
-       case a_s_ALEF_MAKSURA:
-       case a_s_YEH:
-           return TRUE;
-    }
-
-    return FALSE;
-}
-
-
 /*
- * Returns True if c is a Final shape of an ARABIC letter
+ * Find the struct achar pointer to the given arabic char
  */
-    static int
-A_is_f(cur_c)
-    int cur_c;
-{
-    switch (cur_c)
-    {
-       case a_f_ALEF_MADDA:
-       case a_f_ALEF_HAMZA_ABOVE:
-       case a_f_WAW_HAMZA:
-       case a_f_ALEF_HAMZA_BELOW:
-       case a_f_YEH_HAMZA:
-       case a_f_ALEF:
-       case a_f_BEH:
-       case a_f_TEH_MARBUTA:
-       case a_f_TEH:
-       case a_f_THEH:
-       case a_f_JEEM:
-       case a_f_HAH:
-       case a_f_KHAH:
-       case a_f_DAL:
-       case a_f_THAL:
-       case a_f_REH:
-       case a_f_ZAIN:
-       case a_f_SEEN:
-       case a_f_SHEEN:
-       case a_f_SAD:
-       case a_f_DAD:
-       case a_f_TAH:
-       case a_f_ZAH:
-       case a_f_AIN:
-       case a_f_GHAIN:
-       case a_f_FEH:
-       case a_f_QAF:
-       case a_f_KAF:
-       case a_f_LAM:
-       case a_f_MEEM:
-       case a_f_NOON:
-       case a_f_HEH:
-       case a_f_WAW:
-       case a_f_ALEF_MAKSURA:
-       case a_f_YEH:
-       case a_f_LAM_ALEF_MADDA_ABOVE:
-       case a_f_LAM_ALEF_HAMZA_ABOVE:
-       case a_f_LAM_ALEF_HAMZA_BELOW:
-       case a_f_LAM_ALEF:
-           return TRUE;
-    }
-    return FALSE;
-}
-
-
-/*
- * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
- */
-    static int
-chg_c_a2s(cur_c)
-    int cur_c;
-{
-    int tempc;
-
-    switch (cur_c)
-    {
-       case a_HAMZA:
-           tempc = a_s_HAMZA;
-           break;
-       case a_ALEF_MADDA:
-           tempc = a_s_ALEF_MADDA;
-           break;
-       case a_ALEF_HAMZA_ABOVE:
-           tempc = a_s_ALEF_HAMZA_ABOVE;
-           break;
-       case a_WAW_HAMZA:
-           tempc = a_s_WAW_HAMZA;
-           break;
-       case a_ALEF_HAMZA_BELOW:
-           tempc = a_s_ALEF_HAMZA_BELOW;
-           break;
-       case a_YEH_HAMZA:
-           tempc = a_s_YEH_HAMZA;
-           break;
-       case a_ALEF:
-           tempc = a_s_ALEF;
-           break;
-       case a_TEH_MARBUTA:
-           tempc = a_s_TEH_MARBUTA;
-           break;
-       case a_DAL:
-           tempc = a_s_DAL;
-           break;
-       case a_THAL:
-           tempc = a_s_THAL;
-           break;
-       case a_REH:
-           tempc = a_s_REH;
-           break;
-       case a_ZAIN:
-           tempc = a_s_ZAIN;
-           break;
-       case a_TATWEEL:                 /* exceptions */
-           tempc = cur_c;
-           break;
-       case a_WAW:
-           tempc = a_s_WAW;
-           break;
-       case a_ALEF_MAKSURA:
-           tempc = a_s_ALEF_MAKSURA;
-           break;
-       case a_BEH:
-           tempc = a_s_BEH;
-           break;
-       case a_TEH:
-           tempc = a_s_TEH;
-           break;
-       case a_THEH:
-           tempc = a_s_THEH;
-           break;
-       case a_JEEM:
-           tempc = a_s_JEEM;
-           break;
-       case a_HAH:
-           tempc = a_s_HAH;
-           break;
-       case a_KHAH:
-           tempc = a_s_KHAH;
-           break;
-       case a_SEEN:
-           tempc = a_s_SEEN;
-           break;
-       case a_SHEEN:
-           tempc = a_s_SHEEN;
-           break;
-       case a_SAD:
-           tempc = a_s_SAD;
-           break;
-       case a_DAD:
-           tempc = a_s_DAD;
-           break;
-       case a_TAH:
-           tempc = a_s_TAH;
-           break;
-       case a_ZAH:
-           tempc = a_s_ZAH;
-           break;
-       case a_AIN:
-           tempc = a_s_AIN;
-           break;
-       case a_GHAIN:
-           tempc = a_s_GHAIN;
-           break;
-       case a_FEH:
-           tempc = a_s_FEH;
-           break;
-       case a_QAF:
-           tempc = a_s_QAF;
-           break;
-       case a_KAF:
-           tempc = a_s_KAF;
-           break;
-       case a_LAM:
-           tempc = a_s_LAM;
-           break;
-       case a_MEEM:
-           tempc = a_s_MEEM;
-           break;
-       case a_NOON:
-           tempc = a_s_NOON;
-           break;
-       case a_HEH:
-           tempc = a_s_HEH;
-           break;
-       case a_YEH:
-           tempc = a_s_YEH;
-           break;
-       default:
-           tempc = 0;
-    }
-
-    return tempc;
-}
-
-
-/*
- * Change shape - from ISO-8859-6/Isolated to Initial
- */
-    static int
-chg_c_a2i(cur_c)
-    int cur_c;
-{
-    int tempc;
-
-    switch (cur_c)
-    {
-       case a_YEH_HAMZA:
-           tempc = a_i_YEH_HAMZA;
-           break;
-       case a_HAMZA:                   /* exceptions */
-           tempc = a_s_HAMZA;
-           break;
-       case a_ALEF_MADDA:              /* exceptions */
-           tempc = a_s_ALEF_MADDA;
-           break;
-       case a_ALEF_HAMZA_ABOVE:        /* exceptions */
-           tempc = a_s_ALEF_HAMZA_ABOVE;
-           break;
-       case a_WAW_HAMZA:               /* exceptions */
-           tempc = a_s_WAW_HAMZA;
-           break;
-       case a_ALEF_HAMZA_BELOW:        /* exceptions */
-           tempc = a_s_ALEF_HAMZA_BELOW;
-           break;
-       case a_ALEF:                    /* exceptions */
-           tempc = a_s_ALEF;
-           break;
-       case a_TEH_MARBUTA:             /* exceptions */
-           tempc = a_s_TEH_MARBUTA;
-           break;
-       case a_DAL:                     /* exceptions */
-           tempc = a_s_DAL;
-           break;
-       case a_THAL:                    /* exceptions */
-           tempc = a_s_THAL;
-           break;
-       case a_REH:                     /* exceptions */
-           tempc = a_s_REH;
-           break;
-       case a_ZAIN:                    /* exceptions */
-           tempc = a_s_ZAIN;
-           break;
-       case a_TATWEEL:                 /* exceptions */
-           tempc = cur_c;
-           break;
-       case a_WAW:                     /* exceptions */
-           tempc = a_s_WAW;
-           break;
-       case a_ALEF_MAKSURA:            /* exceptions */
-           tempc = a_s_ALEF_MAKSURA;
-           break;
-       case a_BEH:
-           tempc = a_i_BEH;
-           break;
-       case a_TEH:
-           tempc = a_i_TEH;
-           break;
-       case a_THEH:
-           tempc = a_i_THEH;
-           break;
-       case a_JEEM:
-           tempc = a_i_JEEM;
-           break;
-       case a_HAH:
-           tempc = a_i_HAH;
-           break;
-       case a_KHAH:
-           tempc = a_i_KHAH;
-           break;
-       case a_SEEN:
-           tempc = a_i_SEEN;
-           break;
-       case a_SHEEN:
-           tempc = a_i_SHEEN;
-           break;
-       case a_SAD:
-           tempc = a_i_SAD;
-           break;
-       case a_DAD:
-           tempc = a_i_DAD;
-           break;
-       case a_TAH:
-           tempc = a_i_TAH;
-           break;
-       case a_ZAH:
-           tempc = a_i_ZAH;
-           break;
-       case a_AIN:
-           tempc = a_i_AIN;
-           break;
-       case a_GHAIN:
-           tempc = a_i_GHAIN;
-           break;
-       case a_FEH:
-           tempc = a_i_FEH;
-           break;
-       case a_QAF:
-           tempc = a_i_QAF;
-           break;
-       case a_KAF:
-           tempc = a_i_KAF;
-           break;
-       case a_LAM:
-           tempc = a_i_LAM;
-           break;
-       case a_MEEM:
-           tempc = a_i_MEEM;
-           break;
-       case a_NOON:
-           tempc = a_i_NOON;
-           break;
-       case a_HEH:
-           tempc = a_i_HEH;
-           break;
-       case a_YEH:
-           tempc = a_i_YEH;
-           break;
-       default:
-           tempc = 0;
-    }
-
-    return tempc;
-}
-
-
-/*
- * Change shape - from ISO-8859-6/Isolated to Medial
- */
-    static int
-chg_c_a2m(cur_c)
-    int cur_c;
-{
-    int tempc;
-
-    switch (cur_c)
-    {
-       case a_HAMZA:                   /* exception */
-           tempc = a_s_HAMZA;
-           break;
-       case a_ALEF_MADDA:              /* exception */
-           tempc = a_f_ALEF_MADDA;
-           break;
-       case a_ALEF_HAMZA_ABOVE:        /* exception */
-           tempc = a_f_ALEF_HAMZA_ABOVE;
-           break;
-       case a_WAW_HAMZA:               /* exception */
-           tempc = a_f_WAW_HAMZA;
-           break;
-       case a_ALEF_HAMZA_BELOW:        /* exception */
-           tempc = a_f_ALEF_HAMZA_BELOW;
-           break;
-       case a_YEH_HAMZA:
-           tempc = a_m_YEH_HAMZA;
-           break;
-       case a_ALEF:                    /* exception */
-           tempc = a_f_ALEF;
-           break;
-       case a_BEH:
-           tempc = a_m_BEH;
-           break;
-       case a_TEH_MARBUTA:             /* exception */
-           tempc = a_f_TEH_MARBUTA;
-           break;
-       case a_TEH:
-           tempc = a_m_TEH;
-           break;
-       case a_THEH:
-           tempc = a_m_THEH;
-           break;
-       case a_JEEM:
-           tempc = a_m_JEEM;
-           break;
-       case a_HAH:
-           tempc = a_m_HAH;
-           break;
-       case a_KHAH:
-           tempc = a_m_KHAH;
-           break;
-       case a_DAL:                     /* exception */
-           tempc = a_f_DAL;
-           break;
-       case a_THAL:                    /* exception */
-           tempc = a_f_THAL;
-           break;
-       case a_REH:                     /* exception */
-           tempc = a_f_REH;
-           break;
-       case a_ZAIN:                    /* exception */
-           tempc = a_f_ZAIN;
-           break;
-       case a_SEEN:
-           tempc = a_m_SEEN;
-           break;
-       case a_SHEEN:
-           tempc = a_m_SHEEN;
-           break;
-       case a_SAD:
-           tempc = a_m_SAD;
-           break;
-       case a_DAD:
-           tempc = a_m_DAD;
-           break;
-       case a_TAH:
-           tempc = a_m_TAH;
-           break;
-       case a_ZAH:
-           tempc = a_m_ZAH;
-           break;
-       case a_AIN:
-           tempc = a_m_AIN;
-           break;
-       case a_GHAIN:
-           tempc = a_m_GHAIN;
-           break;
-       case a_TATWEEL:                 /* exception */
-           tempc = cur_c;
-           break;
-       case a_FEH:
-           tempc = a_m_FEH;
-           break;
-       case a_QAF:
-           tempc = a_m_QAF;
-           break;
-       case a_KAF:
-           tempc = a_m_KAF;
-           break;
-       case a_LAM:
-           tempc = a_m_LAM;
-           break;
-       case a_MEEM:
-           tempc = a_m_MEEM;
-           break;
-       case a_NOON:
-           tempc = a_m_NOON;
-           break;
-       case a_HEH:
-           tempc = a_m_HEH;
-           break;
-       case a_WAW:                     /* exception */
-           tempc = a_f_WAW;
-           break;
-       case a_ALEF_MAKSURA:            /* exception */
-           tempc = a_f_ALEF_MAKSURA;
-           break;
-       case a_YEH:
-           tempc = a_m_YEH;
-           break;
-       default:
-           tempc = 0;
-    }
-
-    return tempc;
-}
-
-
-/*
- * Change shape - from ISO-8859-6/Isolated to final
- */
-    static int
-chg_c_a2f(cur_c)
-    int cur_c;
-{
-    int tempc;
-
-    /* NOTE: these encodings need to be accounted for
-
-       a_f_ALEF_MADDA;
-       a_f_ALEF_HAMZA_ABOVE;
-       a_f_ALEF_HAMZA_BELOW;
-       a_f_LAM_ALEF_MADDA_ABOVE;
-       a_f_LAM_ALEF_HAMZA_ABOVE;
-       a_f_LAM_ALEF_HAMZA_BELOW;
-       */
-
-    switch (cur_c)
-    {
-       case a_HAMZA:                   /* exception */
-           tempc = a_s_HAMZA;
-           break;
-       case a_ALEF_MADDA:
-           tempc = a_f_ALEF_MADDA;
-           break;
-       case a_ALEF_HAMZA_ABOVE:
-           tempc = a_f_ALEF_HAMZA_ABOVE;
-           break;
-       case a_WAW_HAMZA:
-           tempc = a_f_WAW_HAMZA;
-           break;
-       case a_ALEF_HAMZA_BELOW:
-           tempc = a_f_ALEF_HAMZA_BELOW;
-           break;
-       case a_YEH_HAMZA:
-           tempc = a_f_YEH_HAMZA;
-           break;
-       case a_ALEF:
-           tempc = a_f_ALEF;
-           break;
-       case a_BEH:
-           tempc = a_f_BEH;
-           break;
-       case a_TEH_MARBUTA:
-           tempc = a_f_TEH_MARBUTA;
-           break;
-       case a_TEH:
-           tempc = a_f_TEH;
-           break;
-       case a_THEH:
-           tempc = a_f_THEH;
-           break;
-       case a_JEEM:
-           tempc = a_f_JEEM;
-           break;
-       case a_HAH:
-           tempc = a_f_HAH;
-           break;
-       case a_KHAH:
-           tempc = a_f_KHAH;
-           break;
-       case a_DAL:
-           tempc = a_f_DAL;
-           break;
-       case a_THAL:
-           tempc = a_f_THAL;
-           break;
-       case a_REH:
-           tempc = a_f_REH;
-           break;
-       case a_ZAIN:
-           tempc = a_f_ZAIN;
-           break;
-       case a_SEEN:
-           tempc = a_f_SEEN;
-           break;
-       case a_SHEEN:
-           tempc = a_f_SHEEN;
-           break;
-       case a_SAD:
-           tempc = a_f_SAD;
-           break;
-       case a_DAD:
-           tempc = a_f_DAD;
-           break;
-       case a_TAH:
-           tempc = a_f_TAH;
-           break;
-       case a_ZAH:
-           tempc = a_f_ZAH;
-           break;
-       case a_AIN:
-           tempc = a_f_AIN;
-           break;
-       case a_GHAIN:
-           tempc = a_f_GHAIN;
-           break;
-       case a_TATWEEL:                 /* exception */
-           tempc = cur_c;
-           break;
-       case a_FEH:
-           tempc = a_f_FEH;
-           break;
-       case a_QAF:
-           tempc = a_f_QAF;
-           break;
-       case a_KAF:
-           tempc = a_f_KAF;
-           break;
-       case a_LAM:
-           tempc = a_f_LAM;
-           break;
-       case a_MEEM:
-           tempc = a_f_MEEM;
-           break;
-       case a_NOON:
-           tempc = a_f_NOON;
-           break;
-       case a_HEH:
-           tempc = a_f_HEH;
-           break;
-       case a_WAW:
-           tempc = a_f_WAW;
-           break;
-       case a_ALEF_MAKSURA:
-           tempc = a_f_ALEF_MAKSURA;
-           break;
-       case a_YEH:
-           tempc = a_f_YEH;
-           break;
-       default:
-           tempc = 0;
-    }
-
-    return tempc;
-}
-
-
-/*
- * Change shape - from Initial to Medial
- */
-    static int
-chg_c_i2m(cur_c)
-    int cur_c;
-{
-    int tempc;
-
-    switch (cur_c)
-    {
-       case a_i_YEH_HAMZA:
-           tempc = a_m_YEH_HAMZA;
-           break;
-       case a_i_BEH:
-           tempc = a_m_BEH;
-           break;
-       case a_i_TEH:
-           tempc = a_m_TEH;
-           break;
-       case a_i_THEH:
-           tempc = a_m_THEH;
-           break;
-       case a_i_JEEM:
-           tempc = a_m_JEEM;
-           break;
-       case a_i_HAH:
-           tempc = a_m_HAH;
-           break;
-       case a_i_KHAH:
-           tempc = a_m_KHAH;
-           break;
-       case a_i_SEEN:
-           tempc = a_m_SEEN;
-           break;
-       case a_i_SHEEN:
-           tempc = a_m_SHEEN;
-           break;
-       case a_i_SAD:
-           tempc = a_m_SAD;
-           break;
-       case a_i_DAD:
-           tempc = a_m_DAD;
-           break;
-       case a_i_TAH:
-           tempc = a_m_TAH;
-           break;
-       case a_i_ZAH:
-           tempc = a_m_ZAH;
-           break;
-       case a_i_AIN:
-           tempc = a_m_AIN;
-           break;
-       case a_i_GHAIN:
-           tempc = a_m_GHAIN;
-           break;
-       case a_i_FEH:
-           tempc = a_m_FEH;
-           break;
-       case a_i_QAF:
-           tempc = a_m_QAF;
-           break;
-       case a_i_KAF:
-           tempc = a_m_KAF;
-           break;
-       case a_i_LAM:
-           tempc = a_m_LAM;
-           break;
-       case a_i_MEEM:
-           tempc = a_m_MEEM;
-           break;
-       case a_i_NOON:
-           tempc = a_m_NOON;
-           break;
-       case a_i_HEH:
-           tempc = a_m_HEH;
-           break;
-       case a_i_YEH:
-           tempc = a_m_YEH;
-           break;
-       default:
-           tempc = 0;
-    }
-
-    return tempc;
-}
-
-
-/*
- * Change shape - from Final to Medial
- */
-    static int
-chg_c_f2m(cur_c)
-    int cur_c;
+    static struct achar *
+find_achar(c)
+    int c;
 {
-    int tempc;
-
-    switch (cur_c)
-    {
-       /* NOTE: these encodings are multi-positional, no ?
-          case a_f_ALEF_MADDA:
-          case a_f_ALEF_HAMZA_ABOVE:
-          case a_f_ALEF_HAMZA_BELOW:
-          */
-       case a_f_YEH_HAMZA:
-           tempc = a_m_YEH_HAMZA;
-           break;
-       case a_f_WAW_HAMZA:             /* exceptions */
-       case a_f_ALEF:
-       case a_f_TEH_MARBUTA:
-       case a_f_DAL:
-       case a_f_THAL:
-       case a_f_REH:
-       case a_f_ZAIN:
-       case a_f_WAW:
-       case a_f_ALEF_MAKSURA:
-           tempc = cur_c;
-           break;
-       case a_f_BEH:
-           tempc = a_m_BEH;
-           break;
-       case a_f_TEH:
-           tempc = a_m_TEH;
-           break;
-       case a_f_THEH:
-           tempc = a_m_THEH;
-           break;
-       case a_f_JEEM:
-           tempc = a_m_JEEM;
-           break;
-       case a_f_HAH:
-           tempc = a_m_HAH;
-           break;
-       case a_f_KHAH:
-           tempc = a_m_KHAH;
-           break;
-       case a_f_SEEN:
-           tempc = a_m_SEEN;
-           break;
-       case a_f_SHEEN:
-           tempc = a_m_SHEEN;
-           break;
-       case a_f_SAD:
-           tempc = a_m_SAD;
-           break;
-       case a_f_DAD:
-           tempc = a_m_DAD;
-           break;
-       case a_f_TAH:
-           tempc = a_m_TAH;
-           break;
-       case a_f_ZAH:
-           tempc = a_m_ZAH;
-           break;
-       case a_f_AIN:
-           tempc = a_m_AIN;
-           break;
-       case a_f_GHAIN:
-           tempc = a_m_GHAIN;
-           break;
-       case a_f_FEH:
-           tempc = a_m_FEH;
-           break;
-       case a_f_QAF:
-           tempc = a_m_QAF;
-           break;
-       case a_f_KAF:
-           tempc = a_m_KAF;
-           break;
-       case a_f_LAM:
-           tempc = a_m_LAM;
-           break;
-       case a_f_MEEM:
-           tempc = a_m_MEEM;
-           break;
-       case a_f_NOON:
-           tempc = a_m_NOON;
-           break;
-       case a_f_HEH:
-           tempc = a_m_HEH;
-           break;
-       case a_f_YEH:
-           tempc = a_m_YEH;
-           break;
-           /* NOTE: these encodings are multi-positional, no ?
-               case a_f_LAM_ALEF_MADDA_ABOVE:
-               case a_f_LAM_ALEF_HAMZA_ABOVE:
-               case a_f_LAM_ALEF_HAMZA_BELOW:
-               case a_f_LAM_ALEF:
-               */
-       default:
-           tempc = 0;
+    int h, m, l;
+    h = ARRAY_SIZE(achars);
+    l = 0;
+    /* using binary search to find c */
+    while (l < h) {
+       m = (h + l) / 2;
+       if (achars[m].c == c)
+           return &achars[m];
+       if (c < achars[m].c)
+           h = m;
+       else
+           l = m + 1;
     }
-
-    return tempc;
+    return NULL;
 }
 
-
 /*
  * Change shape - from Combination (2 char) to an Isolated
  */
@@ -988,17 +227,16 @@ chg_c_laa2f(hid_c)
 }
 
 /*
- * Do "half-shaping" on character "c".  Return zero if no shaping.
+ * Returns whether it is possible to join the given letters
  */
     static int
-half_shape(c)
-    int                c;
+can_join(c1, c2)
+    int                c1;
+    int                c2;
 {
-    if (A_is_a(c))
-       return chg_c_a2i(c);
-    if (A_is_valid(c) && A_is_f(c))
-       return chg_c_f2m(c);
-    return 0;
+    struct achar *a1 = find_achar(c1);
+    struct achar *a2 = find_achar(c2);
+    return a1 && a2 && (a1->i || a1->m) && (a2->f || a2->m);
 }
 
 /*
@@ -1019,18 +257,14 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
     int                prev_c1;
     int                next_c;
 {
-    int                curr_c;
-    int                shape_c;
-    int                curr_laa;
-    int                prev_laa;
+    int curr_c;
+    int curr_laa;
+    int prev_laa;
 
     /* Deal only with Arabic character, pass back all others */
     if (!A_is_ok(c))
        return c;
 
-    /* half-shape current and previous character */
-    shape_c = half_shape(prev_c);
-
     /* Save away current character */
     curr_c = c;
 
@@ -1039,8 +273,7 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
 
     if (curr_laa)
     {
-       if (A_is_valid(prev_c) && !A_is_f(shape_c)
-                                        && !A_is_s(shape_c) && !prev_laa)
+       if (A_is_valid(prev_c) && can_join(prev_c, a_LAM) && !prev_laa)
            curr_c = chg_c_laa2f(curr_laa);
        else
            curr_c = chg_c_laa2i(curr_laa);
@@ -1048,17 +281,20 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
        /* Remove the composing character */
        *c1p = 0;
     }
-    else if (!A_is_valid(prev_c) && A_is_valid(next_c))
-       curr_c = chg_c_a2i(c);
-    else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
-       curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
-    else if (A_is_valid(next_c))
-       curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
-    else if (A_is_valid(prev_c))
-       curr_c = chg_c_a2f(c);
     else
-       curr_c = chg_c_a2s(c);
-
+    {
+       struct achar *curr_a = find_achar(c);
+       int backward_combine = !prev_laa && can_join(prev_c, curr_c);
+       int forward_combine = can_join(curr_c, next_c);
+       if (backward_combine && forward_combine)
+           curr_c = curr_a->m;
+       if (backward_combine && !forward_combine)
+           curr_c = curr_a->f;
+       if (!backward_combine && forward_combine)
+           curr_c = curr_a->i;
+       if (!backward_combine && !forward_combine)
+           curr_c = curr_a->s;
+    }
     /* Sanity check -- curr_c should, in the future, never be 0.
      * We should, in the future, insert a fatal error here. */
     if (curr_c == NUL)
@@ -1112,24 +348,7 @@ A_is_harakat(c)
 A_is_iso(c)
     int c;
 {
-    return ((c >= a_HAMZA && c <= a_GHAIN)
-           || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
-           || c == a_MINI_ALEF);
-}
-
-
-/*
- * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
- *             (alphabet/number/punctuation)
- */
-    static int
-A_is_formb(c)
-    int c;
-{
-    return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
-           || c == a_s_KASRATAN
-           || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
-           || c == a_BYTE_ORDER_MARK);
+    return find_achar(c) != NULL;
 }
 
 
@@ -1140,7 +359,7 @@ A_is_formb(c)
 A_is_ok(c)
     int c;
 {
-    return (A_is_iso(c) || A_is_formb(c));
+    return (A_is_iso(c) || c == a_BYTE_ORDER_MARK);
 }
 
 
@@ -1166,3 +385,33 @@ A_is_special(c)
 {
     return (c == a_HAMZA || c == a_s_HAMZA);
 }
+
+/*
+ * Check whether we are dealing with Arabic combining characters.
+ * Note: these are NOT really composing characters!
+ */
+    int
+arabic_combine(one, two)
+    int                one;        /* first character */
+    int                two;        /* character just after "one" */
+{
+    if (one == a_LAM)
+       return arabic_maycombine(two);
+    return FALSE;
+}
+
+/*
+ * Check whether we are dealing with a character that could be regarded as an
+ * Arabic combining character, need to check the character before this.
+ */
+    int
+arabic_maycombine(two)
+    int                two;
+{
+    if (p_arshape && !p_tbidi)
+       return (two == a_ALEF_MADDA
+                   || two == a_ALEF_HAMZA_ABOVE
+                   || two == a_ALEF_HAMZA_BELOW
+                   || two == a_ALEF);
+    return FALSE;
+}
diff --git a/src/arabic.h b/src/arabic.h
--- a/src/arabic.h
+++ b/src/arabic.h
@@ -7,252 +7,11 @@
  */
 
 /*
- * Arabic characters are categorized into following types:
- *
- * Isolated    - iso-8859-6 form         char denoted with     a_*
- * Initial     - unicode form-B start    char denoted with     a_i_*
- * Medial      - unicode form-B middle   char denoted with     a_m_*
- * Final       - unicode form-B final    char denoted with     a_f_*
- * Stand-Alone - unicode form-B isolated char denoted with     a_s_* (NOT USED)
- *
- * --
  *
  * Author: Nadim Shaikli & Isam Bayazidi
  * - (based on Unicode)
  *
  */
 
-/*
- * Arabic ISO-10646-1 character set definition
- */
-
-/*
- * Arabic ISO-8859-6 (subset of 10646; 0600 - 06FF)
- */
-#define a_COMMA                                0x060C
-#define a_SEMICOLON                    0x061B
-#define a_QUESTION                     0x061F
-#define a_HAMZA                                0x0621
-#define a_ALEF_MADDA                   0x0622
-#define a_ALEF_HAMZA_ABOVE             0x0623
-#define a_WAW_HAMZA                    0x0624
-#define a_ALEF_HAMZA_BELOW             0x0625
-#define a_YEH_HAMZA                    0x0626
-#define a_ALEF                         0x0627
-#define a_BEH                          0x0628
-#define a_TEH_MARBUTA                  0x0629
-#define a_TEH                          0x062a
-#define a_THEH                         0x062b
-#define a_JEEM                         0x062c
-#define a_HAH                          0x062d
-#define a_KHAH                         0x062e
-#define a_DAL                          0x062f
-#define a_THAL                         0x0630
-#define a_REH                          0x0631
-#define a_ZAIN                         0x0632
-#define a_SEEN                         0x0633
-#define a_SHEEN                                0x0634
-#define a_SAD                          0x0635
-#define a_DAD                          0x0636
-#define a_TAH                          0x0637
-#define a_ZAH                          0x0638
-#define a_AIN                          0x0639
-#define a_GHAIN                                0x063a
-#define a_TATWEEL                      0x0640
-#define a_FEH                          0x0641
-#define a_QAF                          0x0642
-#define a_KAF                          0x0643
-#define a_LAM                          0x0644
-#define a_MEEM                         0x0645
-#define a_NOON                         0x0646
-#define a_HEH                          0x0647
-#define a_WAW                          0x0648
-#define a_ALEF_MAKSURA                 0x0649
-#define a_YEH                          0x064a
-
-#define a_FATHATAN                     0x064b
-#define a_DAMMATAN                     0x064c
-#define a_KASRATAN                     0x064d
-#define a_FATHA                                0x064e
-#define a_DAMMA                                0x064f
-#define a_KASRA                                0x0650
-#define a_SHADDA                       0x0651
-#define a_SUKUN                                0x0652
-
-#define a_MADDA_ABOVE                  0x0653
-#define a_HAMZA_ABOVE                  0x0654
-#define a_HAMZA_BELOW                  0x0655
-
-#define a_ZERO                         0x0660
-#define a_ONE                          0x0661
-#define a_TWO                          0x0662
-#define a_THREE                                0x0663
-#define a_FOUR                         0x0664
-#define a_FIVE                         0x0665
-#define a_SIX                          0x0666
-#define a_SEVEN                                0x0667
-#define a_EIGHT                                0x0668
-#define a_NINE                         0x0669
-#define a_PERCENT                      0x066a
-#define a_DECIMAL                      0x066b
-#define a_THOUSANDS                    0x066c
-#define a_STAR                         0x066d
-#define a_MINI_ALEF                    0x0670
-/* Rest of 8859-6 does not relate to Arabic */
-
-/*
- * Arabic Presentation Form-B (subset of 10646; FE70 - FEFF)
- *
- *  s -> isolated
- *  i -> initial
- *  m -> medial
- *  f -> final
- *
- */
-#define a_s_FATHATAN                   0xfe70
-#define a_m_TATWEEL_FATHATAN           0xfe71
-#define a_s_DAMMATAN                   0xfe72
-
-#define a_s_KASRATAN                   0xfe74
-
-#define a_s_FATHA                      0xfe76
-#define a_m_FATHA                      0xfe77
-#define a_s_DAMMA                      0xfe78
-#define a_m_DAMMA                      0xfe79
-#define a_s_KASRA                      0xfe7a
-#define a_m_KASRA                      0xfe7b
-#define a_s_SHADDA                     0xfe7c
-#define a_m_SHADDA                     0xfe7d
-#define a_s_SUKUN                      0xfe7e
-#define a_m_SUKUN                      0xfe7f
-
-#define a_s_HAMZA                      0xfe80
-#define a_s_ALEF_MADDA                 0xfe81
-#define a_f_ALEF_MADDA                 0xfe82
-#define a_s_ALEF_HAMZA_ABOVE           0xfe83
-#define a_f_ALEF_HAMZA_ABOVE           0xfe84
-#define a_s_WAW_HAMZA                  0xfe85
-#define a_f_WAW_HAMZA                  0xfe86
-#define a_s_ALEF_HAMZA_BELOW           0xfe87
-#define a_f_ALEF_HAMZA_BELOW           0xfe88
-#define a_s_YEH_HAMZA                  0xfe89
-#define a_f_YEH_HAMZA                  0xfe8a
-#define a_i_YEH_HAMZA                  0xfe8b
-#define a_m_YEH_HAMZA                  0xfe8c
-#define a_s_ALEF                       0xfe8d
-#define a_f_ALEF                       0xfe8e
-#define a_s_BEH                                0xfe8f
-#define a_f_BEH                                0xfe90
-#define a_i_BEH                                0xfe91
-#define a_m_BEH                                0xfe92
-#define a_s_TEH_MARBUTA                        0xfe93
-#define a_f_TEH_MARBUTA                        0xfe94
-#define a_s_TEH                                0xfe95
-#define a_f_TEH                                0xfe96
-#define a_i_TEH                                0xfe97
-#define a_m_TEH                                0xfe98
-#define a_s_THEH                       0xfe99
-#define a_f_THEH                       0xfe9a
-#define a_i_THEH                       0xfe9b
-#define a_m_THEH                       0xfe9c
-#define a_s_JEEM                       0xfe9d
-#define a_f_JEEM                       0xfe9e
-#define a_i_JEEM                       0xfe9f
-#define a_m_JEEM                       0xfea0
-#define a_s_HAH                                0xfea1
-#define a_f_HAH                                0xfea2
-#define a_i_HAH                                0xfea3
-#define a_m_HAH                                0xfea4
-#define a_s_KHAH                       0xfea5
-#define a_f_KHAH                       0xfea6
-#define a_i_KHAH                       0xfea7
-#define a_m_KHAH                       0xfea8
-#define a_s_DAL                                0xfea9
-#define a_f_DAL                                0xfeaa
-#define a_s_THAL                       0xfeab
-#define a_f_THAL                       0xfeac
-#define a_s_REH                                0xfead
-#define a_f_REH                                0xfeae
-#define a_s_ZAIN                       0xfeaf
-#define a_f_ZAIN                       0xfeb0
-#define a_s_SEEN                       0xfeb1
-#define a_f_SEEN                       0xfeb2
-#define a_i_SEEN                       0xfeb3
-#define a_m_SEEN                       0xfeb4
-#define a_s_SHEEN                      0xfeb5
-#define a_f_SHEEN                      0xfeb6
-#define a_i_SHEEN                      0xfeb7
-#define a_m_SHEEN                      0xfeb8
-#define a_s_SAD                                0xfeb9
-#define a_f_SAD                                0xfeba
-#define a_i_SAD                                0xfebb
-#define a_m_SAD                                0xfebc
-#define a_s_DAD                                0xfebd
-#define a_f_DAD                                0xfebe
-#define a_i_DAD                                0xfebf
-#define a_m_DAD                                0xfec0
-#define a_s_TAH                                0xfec1
-#define a_f_TAH                                0xfec2
-#define a_i_TAH                                0xfec3
-#define a_m_TAH                                0xfec4
-#define a_s_ZAH                                0xfec5
-#define a_f_ZAH                                0xfec6
-#define a_i_ZAH                                0xfec7
-#define a_m_ZAH                                0xfec8
-#define a_s_AIN                                0xfec9
-#define a_f_AIN                                0xfeca
-#define a_i_AIN                                0xfecb
-#define a_m_AIN                                0xfecc
-#define a_s_GHAIN                      0xfecd
-#define a_f_GHAIN                      0xfece
-#define a_i_GHAIN                      0xfecf
-#define a_m_GHAIN                      0xfed0
-#define a_s_FEH                                0xfed1
-#define a_f_FEH                                0xfed2
-#define a_i_FEH                                0xfed3
-#define a_m_FEH                                0xfed4
-#define a_s_QAF                                0xfed5
-#define a_f_QAF                                0xfed6
-#define a_i_QAF                                0xfed7
-#define a_m_QAF                                0xfed8
-#define a_s_KAF                                0xfed9
-#define a_f_KAF                                0xfeda
-#define a_i_KAF                                0xfedb
-#define a_m_KAF                                0xfedc
-#define a_s_LAM                                0xfedd
-#define a_f_LAM                                0xfede
-#define a_i_LAM                                0xfedf
-#define a_m_LAM                                0xfee0
-#define a_s_MEEM                       0xfee1
-#define a_f_MEEM                       0xfee2
-#define a_i_MEEM                       0xfee3
-#define a_m_MEEM                       0xfee4
-#define a_s_NOON                       0xfee5
-#define a_f_NOON                       0xfee6
-#define a_i_NOON                       0xfee7
-#define a_m_NOON                       0xfee8
-#define a_s_HEH                                0xfee9
-#define a_f_HEH                                0xfeea
-#define a_i_HEH                                0xfeeb
-#define a_m_HEH                                0xfeec
-#define a_s_WAW                                0xfeed
-#define a_f_WAW                                0xfeee
-#define a_s_ALEF_MAKSURA               0xfeef
-#define a_f_ALEF_MAKSURA               0xfef0
-#define a_s_YEH                                0xfef1
-#define a_f_YEH                                0xfef2
-#define a_i_YEH                                0xfef3
-#define a_m_YEH                                0xfef4
-#define a_s_LAM_ALEF_MADDA_ABOVE       0xfef5
-#define a_f_LAM_ALEF_MADDA_ABOVE       0xfef6
-#define a_s_LAM_ALEF_HAMZA_ABOVE       0xfef7
-#define a_f_LAM_ALEF_HAMZA_ABOVE       0xfef8
-#define a_s_LAM_ALEF_HAMZA_BELOW       0xfef9
-#define a_f_LAM_ALEF_HAMZA_BELOW       0xfefa
-#define a_s_LAM_ALEF                   0xfefb
-#define a_f_LAM_ALEF                   0xfefc
-
-#define a_BYTE_ORDER_MARK              0xfeff
-
 /* Range of Arabic characters that might be shaped. */
-#define ARABIC_CHAR(c)         ((c) >= a_HAMZA && (c) <= a_MINI_ALEF)
+#define ARABIC_CHAR(c)         (((c) & 0xFF00) == 0x0600)
diff --git a/src/mbyte.c b/src/mbyte.c
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -1450,36 +1450,6 @@ mb_cptr2char_adv(pp)
 
 #if defined(FEAT_ARABIC) || defined(PROTO)
 /*
- * Check whether we are dealing with Arabic combining characters.
- * Note: these are NOT really composing characters!
- */
-    int
-arabic_combine(one, two)
-    int                one;        /* first character */
-    int                two;        /* character just after "one" */
-{
-    if (one == a_LAM)
-       return arabic_maycombine(two);
-    return FALSE;
-}
-
-/*
- * Check whether we are dealing with a character that could be regarded as an
- * Arabic combining character, need to check the character before this.
- */
-    int
-arabic_maycombine(two)
-    int                two;
-{
-    if (p_arshape && !p_tbidi)
-       return (two == a_ALEF_MADDA
-                   || two == a_ALEF_HAMZA_ABOVE
-                   || two == a_ALEF_HAMZA_BELOW
-                   || two == a_ALEF);
-    return FALSE;
-}
-
-/*
  * Check if the character pointed to by "p2" is a composing character when it
  * comes after "p1".  For Arabic sometimes "ab" is replaced with "c", which
  * behaves like a composing character.

--~--~---------~--~----~------------~-------~--~----~
You received this message from the "vim_dev" maillist.
For more information, visit http://www.vim.org/maillist.php
-~----------~----~----~----~------~----~------~--~---

Raspunde prin e-mail lui