On Wed, 08 Oct 2014 08:11:34 +0330
Ali Gholami Rudi <[email protected]> wrote:
> I tested it and it is working wonderfully. The only issue is
> ZWNJ (unicode 0x200c) and ZWJ (unicode 0x200d); I wonder if
> the letters before and after these characters can be shaped
> properly...
For ZWNJ, I did a tiny modification telling `arabic_shape` in one of
it's invocations, to separate letters based on previously entered ZWNJ
character. ZWJ could be treated similarly but I doubt with current
provided letters which are limited to Arabic and Farsi, there could be a
use case for ZWJ.
You can apply `zwnj.0.patch` on a recent vim tarball or `zwnj.1.patch`
on top of your patch.
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
diff --git a/src/arabic.c b/src/arabic.c
index 72c64b5..98e34fa 100644
--- a/src/arabic.c
+++ b/src/arabic.c
@@ -1009,15 +1009,17 @@ half_shape(c)
* in: "prev_c1" is the first composing char for the previous char
* (not shaped)
* in: "next_c" is the next character (not shaped).
+ * in: "zwc" previous zero-width character.
*/
int
-arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
+arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c, zwc)
int c;
int *ccp;
int *c1p;
int prev_c;
int prev_c1;
int next_c;
+ enum ZW_CONTEXT zwc;
{
int curr_c;
int shape_c;
@@ -1039,7 +1041,7 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
if (curr_laa)
{
- if (A_is_valid(prev_c) && !A_is_f(shape_c)
+ if (A_is_valid(prev_c) && zwc != ZWNJ && !A_is_f(shape_c)
&& !A_is_s(shape_c) && !prev_laa)
curr_c = chg_c_laa2f(curr_laa);
else
@@ -1048,9 +1050,9 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
/* Remove the composing character */
*c1p = 0;
}
- else if (!A_is_valid(prev_c) && A_is_valid(next_c))
+ else if ((!A_is_valid(prev_c) || zwc == ZWNJ) && A_is_valid(next_c))
curr_c = chg_c_a2i(c);
- else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
+ else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa || zwc == ZWNJ)
curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
else if (A_is_valid(next_c))
curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
diff --git a/src/ex_getln.c b/src/ex_getln.c
index cdd59de..f19eb54 100644
--- a/src/ex_getln.c
+++ b/src/ex_getln.c
@@ -2721,7 +2721,7 @@ draw_cmdline(start, len)
}
prev_c = u8c;
- u8c = arabic_shape(u8c, NULL, &u8cc[0], pc, pc1, nc);
+ u8c = arabic_shape(u8c, NULL, &u8cc[0], pc, pc1, nc, ZW_NONE);
newlen += (*mb_char2bytes)(u8c, arshape_buf + newlen);
if (u8cc[0] != 0)
diff --git a/src/proto/main.pro b/src/proto/main.pro
index 62e80b8..462872f 100644
--- a/src/proto/main.pro
+++ b/src/proto/main.pro
@@ -22,5 +22,6 @@ int F_isalpha __ARGS((int c));
int F_isdigit __ARGS((int c));
int F_ischar __ARGS((int c));
void farsi_fkey __ARGS((cmdarg_T *cap));
-int arabic_shape __ARGS((int c, int *ccp, int *c1p, int prev_c, int prev_c1, int next_c));
+enum ZW_CONTEXT { ZW_NONE, ZWNJ, ZWJ };
+int arabic_shape __ARGS((int c, int *ccp, int *c1p, int prev_c, int prev_c1, int next_c, enum ZW_CONTEXT zwc));
/* vim: set ft=c : */
diff --git a/src/screen.c b/src/screen.c
index a72ecaf..cdb5be8 100644
--- a/src/screen.c
+++ b/src/screen.c
@@ -2585,7 +2585,7 @@ fold_line(wp, fold_count, foldinfo, lnum, row)
prev_c = u8c;
u8c = arabic_shape(u8c, &firstbyte, &u8cc[0],
- pc, pc1, nc);
+ pc, pc1, nc, ZW_NONE);
ScreenLines[idx] = firstbyte;
}
else
@@ -3009,6 +3009,10 @@ win_line(wp, lnum, startrow, endrow, nochange)
# define VCOL_HLC (vcol)
#endif
+#ifdef FEAT_ARABIC
+ enum ZW_CONTEXT zwc = ZW_NONE;
+#endif
+
if (startrow > endrow) /* past the end already! */
return startrow;
@@ -4097,6 +4101,15 @@ win_line(wp, lnum, startrow, endrow, nochange)
STRCPY(extra, "\357\274\237");
# endif
+#ifdef FEAT_ARABIC
+ /*
+ * Remember zero-width for processing
+ */
+ if (mb_c == 0x200c)
+ zwc = ZWNJ;
+ else if (mb_c == 0x200d)
+ zwc = ZWJ;
+#endif
p_extra = extra;
c = *p_extra;
mb_c = mb_ptr2char_adv(&p_extra);
@@ -4136,10 +4149,15 @@ win_line(wp, lnum, startrow, endrow, nochange)
}
prev_c = mb_c;
- mb_c = arabic_shape(mb_c, &c, &u8cc[0], pc, pc1, nc);
+ mb_c = arabic_shape(mb_c, &c, &u8cc[0], pc, pc1, nc, zwc);
+ zwc = ZW_NONE;
}
else
+ {
prev_c = mb_c;
+ if (prev_c != 0x200c || prev_c != 0x200d)
+ zwc = ZW_NONE;
+ }
#endif
}
else /* enc_dbcs */
@@ -7050,7 +7068,7 @@ screen_puts_len(text, len, row, col, attr)
}
pc = prev_c;
prev_c = u8c;
- u8c = arabic_shape(u8c, &c, &u8cc[0], nc, nc1, pc);
+ u8c = arabic_shape(u8c, &c, &u8cc[0], nc, nc1, pc, ZW_NONE);
}
else
prev_c = u8c;
diff --git a/src/arabic.c b/src/arabic.c
index 654555f..b89151d 100644
--- a/src/arabic.c
+++ b/src/arabic.c
@@ -265,15 +265,17 @@ arabic_maycombine(two)
* in: "prev_c1" is the first composing char for the previous char
* (not shaped)
* in: "next_c" is the next character (not shaped).
+ * in: "zwc" previous zero-width character.
*/
int
-arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
+arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c, zwc)
int c;
int *ccp;
int *c1p;
int prev_c;
int prev_c1;
int next_c;
+ enum ZW_CONTEXT zwc;
{
int curr_c;
int curr_laa;
@@ -291,7 +293,7 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
if (curr_laa)
{
- if (A_is_valid(prev_c) && can_join(prev_c, a_LAM) && !prev_laa)
+ if (A_is_valid(prev_c) && can_join(prev_c, a_LAM) && !prev_laa && zwc != ZWNJ)
curr_c = chg_c_laa2f(*c1p);
else
curr_c = chg_c_laa2i(*c1p);
@@ -302,7 +304,7 @@ arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
else
{
struct achar *curr_a = find_achar(c);
- int backward_combine = !prev_laa && can_join(prev_c, curr_c);
+ int backward_combine = !prev_laa && can_join(prev_c, curr_c) && zwc != ZWNJ;
int forward_combine = can_join(curr_c, next_c);
if (backward_combine && forward_combine)
curr_c = curr_a->m;
diff --git a/src/ex_getln.c b/src/ex_getln.c
index cdd59de..f19eb54 100644
--- a/src/ex_getln.c
+++ b/src/ex_getln.c
@@ -2721,7 +2721,7 @@ draw_cmdline(start, len)
}
prev_c = u8c;
- u8c = arabic_shape(u8c, NULL, &u8cc[0], pc, pc1, nc);
+ u8c = arabic_shape(u8c, NULL, &u8cc[0], pc, pc1, nc, ZW_NONE);
newlen += (*mb_char2bytes)(u8c, arshape_buf + newlen);
if (u8cc[0] != 0)
diff --git a/src/proto/main.pro b/src/proto/main.pro
index 62e80b8..462872f 100644
--- a/src/proto/main.pro
+++ b/src/proto/main.pro
@@ -22,5 +22,6 @@ int F_isalpha __ARGS((int c));
int F_isdigit __ARGS((int c));
int F_ischar __ARGS((int c));
void farsi_fkey __ARGS((cmdarg_T *cap));
-int arabic_shape __ARGS((int c, int *ccp, int *c1p, int prev_c, int prev_c1, int next_c));
+enum ZW_CONTEXT { ZW_NONE, ZWNJ, ZWJ };
+int arabic_shape __ARGS((int c, int *ccp, int *c1p, int prev_c, int prev_c1, int next_c, enum ZW_CONTEXT zwc));
/* vim: set ft=c : */
diff --git a/src/screen.c b/src/screen.c
index a72ecaf..cdb5be8 100644
--- a/src/screen.c
+++ b/src/screen.c
@@ -2585,7 +2585,7 @@ fold_line(wp, fold_count, foldinfo, lnum, row)
prev_c = u8c;
u8c = arabic_shape(u8c, &firstbyte, &u8cc[0],
- pc, pc1, nc);
+ pc, pc1, nc, ZW_NONE);
ScreenLines[idx] = firstbyte;
}
else
@@ -3009,6 +3009,10 @@ win_line(wp, lnum, startrow, endrow, nochange)
# define VCOL_HLC (vcol)
#endif
+#ifdef FEAT_ARABIC
+ enum ZW_CONTEXT zwc = ZW_NONE;
+#endif
+
if (startrow > endrow) /* past the end already! */
return startrow;
@@ -4097,6 +4101,15 @@ win_line(wp, lnum, startrow, endrow, nochange)
STRCPY(extra, "\357\274\237");
# endif
+#ifdef FEAT_ARABIC
+ /*
+ * Remember zero-width for processing
+ */
+ if (mb_c == 0x200c)
+ zwc = ZWNJ;
+ else if (mb_c == 0x200d)
+ zwc = ZWJ;
+#endif
p_extra = extra;
c = *p_extra;
mb_c = mb_ptr2char_adv(&p_extra);
@@ -4136,10 +4149,15 @@ win_line(wp, lnum, startrow, endrow, nochange)
}
prev_c = mb_c;
- mb_c = arabic_shape(mb_c, &c, &u8cc[0], pc, pc1, nc);
+ mb_c = arabic_shape(mb_c, &c, &u8cc[0], pc, pc1, nc, zwc);
+ zwc = ZW_NONE;
}
else
+ {
prev_c = mb_c;
+ if (prev_c != 0x200c || prev_c != 0x200d)
+ zwc = ZW_NONE;
+ }
#endif
}
else /* enc_dbcs */
@@ -7050,7 +7068,7 @@ screen_puts_len(text, len, row, col, attr)
}
pc = prev_c;
prev_c = u8c;
- u8c = arabic_shape(u8c, &c, &u8cc[0], nc, nc1, pc);
+ u8c = arabic_shape(u8c, &c, &u8cc[0], nc, nc1, pc, ZW_NONE);
}
else
prev_c = u8c;