/* vi:set ts=8 sts=4 sw=4:
 *
 * VIM - Vi IMproved    by Bram Moolenaar
 *
 * Do ":help uganda"  in Vim to read copying and usage conditions.
 * Do ":help credits" in Vim to see a list of people who contributed.
 * See README.txt for an overview of the Vim source code.
 */

/*
 * arabic.c: functions for Arabic language
 *
 * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
 *
 * --
 *
 * Author: Nadim Shaikli & Isam Bayazidi
 * Farsi support and restructuring to make adding new
 *   letters easier by Ali Gholami Rudi
 *
 */

/*
 * Sorted list of unicode Arabic characters.  Each entry holds the
 * presentation forms of a letter.
 *
 * Arabic characters are categorized into following types:
 *
 * Isolated	- iso-8859-6 form
 * Initial	- unicode form-B start
 * Medial	- unicode form-B middle
 * Final	- unicode form-B final
 * Stand-Alone	- unicode form-B isolated
 *
 * The fields in the struct represent:
 *
 *  s -> isolated
 *  i -> initial
 *  m -> medial
 *  f -> final
 *
 */
static struct achar {
	unsigned c;
	unsigned s;
	unsigned i;
	unsigned m;
	unsigned f;
} achars[] = {
	{0x0621, 0xfe80},				/* a_HAMZA */
	{0x0622, 0xfe81, 0, 0, 0xfe82},			/* a_ALEF_MADDA */
	{0x0623, 0xfe83, 0, 0, 0xfe84},			/* a_ALEF_HAMZA_ABOVE */
	{0x0624, 0xfe85, 0, 0, 0xfe86},			/* a_WAW_HAMZA */
	{0x0625, 0xfe87, 0, 0, 0xfe88},			/* a_ALEF_HAMZA_BELOW */
	{0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},	/* a_YEH_HAMZA */
	{0x0627, 0xfe8d, 0, 0, 0xfe8e},			/* a_ALEF */
	{0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},	/* a_BEH */
	{0x0629, 0xfe93, 0, 0, 0xfe94},			/* a_TEH_MARBUTA */
	{0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},	/* a_TEH */
	{0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},	/* a_THEH */
	{0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},	/* a_JEEM */
	{0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},	/* a_HAH */
	{0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},	/* a_KHAH */
	{0x062f, 0xfea9, 0, 0, 0xfeaa},			/* a_DAL */
	{0x0630, 0xfeab, 0, 0, 0xfeac},			/* a_THAL */
	{0x0631, 0xfead, 0, 0, 0xfeae},			/* a_REH */
	{0x0632, 0xfeaf, 0, 0, 0xfeb0},			/* a_ZAIN */
	{0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},	/* a_SEEN */
	{0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},	/* a_SHEEN */
	{0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},	/* a_SAD */
	{0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},	/* a_DAD */
	{0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},	/* a_TAH */
	{0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},	/* a_ZAH */
	{0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},	/* a_AIN */
	{0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},	/* a_GHAIN */
	{0x0640},					/* a_TATWEEL */
	{0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},	/* a_FEH */
	{0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},	/* a_QAF */
	{0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},	/* a_KAF */
	{0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},	/* a_LAM */
	{0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},	/* a_MEEM */
	{0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},	/* a_NOON */
	{0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},	/* a_HEH */
	{0x0648, 0xfeed, 0, 0, 0xfeee},			/* a_WAW */
	{0x0649, 0xfeef, 0, 0, 0xfef0},			/* a_ALEF_MAKSURA */
	{0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},	/* a_YEH */
	{0x064b, 0xfe70},				/* a_FATHATAN */
	{0x064c, 0xfe72},				/* a_DAMMATAN */
	{0x064d, 0xfe74},				/* a_KASRATAN */
	{0x064e, 0xfe76, 0, 0xfe77, 0},			/* a_FATHA */
	{0x064f, 0xfe78, 0, 0xfe79, 0},			/* a_DAMMA */
	{0x0650, 0xfe7a, 0, 0xfe7b, 0},			/* a_KASRA */
	{0x0651, 0xfe7c, 0, 0xfe7c, 0},			/* a_SHADDA */
	{0x0652, 0xfe7e, 0, 0xfe7f, 0},			/* a_SUKUN */
	{0x0653},					/* a_MADDA_ABOVE */
	{0x0654},					/* a_HAMZA_ABOVE */
	{0x0655},					/* a_HAMZA_BELOW */
	{0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},	/* a_PEH */
	{0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},	/* a_TCHEH */
	{0x0698, 0xfb8a, 0, 0, 0xfb8b},			/* a_JEH */
	{0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},	/* a_FKAF */
	{0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},	/* a_GAF */
	{0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},	/* a_FYEH */
};

/* these values are hardcoded in functions */
#define a_HAMZA				0x0621
#define a_ALEF_MADDA			0x0622
#define a_ALEF_HAMZA_ABOVE		0x0623
#define a_ALEF_HAMZA_BELOW		0x0625
#define a_ALEF				0x0627
#define a_LAM				0x0644

#define a_s_LAM_ALEF_MADDA_ABOVE	0xfef5
#define a_f_LAM_ALEF_MADDA_ABOVE	0xfef6
#define a_s_LAM_ALEF_HAMZA_ABOVE	0xfef7
#define a_f_LAM_ALEF_HAMZA_ABOVE	0xfef8
#define a_s_LAM_ALEF_HAMZA_BELOW	0xfef9
#define a_f_LAM_ALEF_HAMZA_BELOW	0xfefa
#define a_s_LAM_ALEF			0xfefb
#define a_f_LAM_ALEF			0xfefc

#define a_BYTE_ORDER_MARK		0xfeff

#define ARRAY_SIZE(a)		(sizeof(a) / sizeof((a)[0]))

static int  chg_c_laa2i __ARGS((int hid_c));
static int  chg_c_laa2f __ARGS((int hid_c));
static int  A_is_iso __ARGS((int c));
static int  A_is_ok __ARGS((int c));
static int  A_is_valid __ARGS((int c));

/*
 * Find the struct achar pointer to the given arabic char
 */
    static struct achar *
find_achar(c)
    int c;
{
    int h, m, l;
    h = ARRAY_SIZE(achars);
    l = 0;
    /* using binary search to find c */
    while (l < h) {
	m = (h + l) / 2;
	if (achars[m].c == c)
	    return &achars[m];
	if (c < achars[m].c)
	    h = m;
	else
	    l = m + 1;
    }
    return NULL;
}

/*
 * Change shape - from Combination (2 char) to an Isolated
 */
    static int
chg_c_laa2i(hid_c)
    int hid_c;
{
    int tempc;

    switch (hid_c)
    {
	case a_ALEF_MADDA:
	    tempc = a_s_LAM_ALEF_MADDA_ABOVE;
	    break;
	case a_ALEF_HAMZA_ABOVE:
	    tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
	    break;
	case a_ALEF_HAMZA_BELOW:
	    tempc = a_s_LAM_ALEF_HAMZA_BELOW;
	    break;
	case a_ALEF:
	    tempc = a_s_LAM_ALEF;
	    break;
	default:
	    tempc = 0;
    }

    return tempc;
}


/*
 * Change shape - from Combination-Isolated to Final
 */
    static int
chg_c_laa2f(hid_c)
    int hid_c;
{
    int tempc;

    switch (hid_c)
    {
	case a_ALEF_MADDA:
	    tempc = a_f_LAM_ALEF_MADDA_ABOVE;
	    break;
	case a_ALEF_HAMZA_ABOVE:
	    tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
	    break;
	case a_ALEF_HAMZA_BELOW:
	    tempc = a_f_LAM_ALEF_HAMZA_BELOW;
	    break;
	case a_ALEF:
	    tempc = a_f_LAM_ALEF;
	    break;
	default:
	    tempc = 0;
    }

    return tempc;
}

/*
 * Returns whether it is possible to join the given letters
 */
    static int
can_join(c1, c2)
    int		c1;
    int		c2;
{
    struct achar *a1 = find_achar(c1);
    struct achar *a2 = find_achar(c2);
    return a1 && a2 && (a1->i || a1->m) && (a2->f || a2->m);
}

/*
 * Check whether we are dealing with Arabic combining characters.
 * Note: these are NOT really composing characters!
 */
    int
arabic_combine(one, two)
    int		one;	    /* first character */
    int		two;	    /* character just after "one" */
{
    if (one == a_LAM)
	return arabic_maycombine(two);
    return FALSE;
}

/*
 * Check whether we are dealing with a character that could be regarded as an
 * Arabic combining character, need to check the character before this.
 */
    int
arabic_maycombine(two)
    int		two;
{
    if (p_arshape && !p_tbidi)
	return (two == a_ALEF_MADDA
		    || two == a_ALEF_HAMZA_ABOVE
		    || two == a_ALEF_HAMZA_BELOW
		    || two == a_ALEF);
    return FALSE;
}

/*
 * Do Arabic shaping on character "c".  Returns the shaped character.
 * out:    "ccp" points to the first byte of the character to be shaped.
 * in/out: "c1p" points to the first composing char for "c".
 * in:     "prev_c"  is the previous character (not shaped)
 * in:     "prev_c1" is the first composing char for the previous char
 *		     (not shaped)
 * in:     "next_c"  is the next character (not shaped).
 */
    int
arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
    int		c;
    int		*ccp;
    int		*c1p;
    int		prev_c;
    int		prev_c1;
    int		next_c;
{
    int curr_c;
    int curr_laa;
    int prev_laa;

    /* Deal only with Arabic character, pass back all others */
    if (!A_is_ok(c))
	return c;

    /* Save away current character */
    curr_c = c;

    curr_laa = arabic_combine(c, *c1p);
    prev_laa = arabic_combine(prev_c, prev_c1);

    if (curr_laa)
    {
	if (A_is_valid(prev_c) && can_join(prev_c, a_LAM) && !prev_laa)
	    curr_c = chg_c_laa2f(curr_laa);
	else
	    curr_c = chg_c_laa2i(curr_laa);

	/* Remove the composing character */
	*c1p = 0;
    }
    else
    {
	struct achar *curr_a = find_achar(c);
	int backward_combine = !prev_laa && can_join(prev_c, curr_c);
	int forward_combine = can_join(curr_c, next_c);
	if (backward_combine && forward_combine)
	    curr_c = curr_a->m;
	if (backward_combine && !forward_combine)
	    curr_c = curr_a->f;
	if (!backward_combine && forward_combine)
	    curr_c = curr_a->i;
	if (!backward_combine && !forward_combine)
	    curr_c = curr_a->s;
    }
    /* Sanity check -- curr_c should, in the future, never be 0.
     * We should, in the future, insert a fatal error here. */
    if (curr_c == NUL)
	curr_c = c;

    if (curr_c != c && ccp != NULL)
    {
	char_u buf[MB_MAXBYTES];

	/* Update the first byte of the character. */
	(*mb_char2bytes)(curr_c, buf);
	*ccp = buf[0];
    }

    /* Return the shaped character */
    return curr_c;
}


/*
 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
 *		(alphabet/number/punctuation)
 */
    static int
A_is_iso(c)
    int c;
{
    return find_achar(c) != NULL;
}


/*
 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
 */
    static int
A_is_ok(c)
    int c;
{
    return (A_is_iso(c) || c == a_BYTE_ORDER_MARK);
}


/*
 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
 *		with some exceptions/exclusions
 */
    static int
A_is_valid(c)
    int c;
{
    return (A_is_ok(c) && c != a_HAMZA);
}
