Hi Bram,
2013/10/09 Wed 6:05:24 UTC+9 Bram Moolenaar wrote:
> Thanks. "nocjk" is a bit strange, the other entries in 'spelllang'
> specify languages for which words will be recognized and not marked as
> errors. I suggested "cjk" as it would see all CJK letters as OK.
> Perhaps "ignore-cjk" would be clearer, but it's a bit long.
>
> I don't think there will ever be a "cjk" language, thus there should be
> no reason to avoid that in case we do get a "cjk" spell checker.
Ah, I understand.
I have updated the patch.
Regards,
Ken Takata
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.
# HG changeset patch
# Parent 6bd6b1784b04ccab2f30d0e3b1d7ff1613788911
diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -6672,6 +6672,8 @@
region by listing them: "en_us,en_ca" supports both US and Canadian
English, but not words specific for Australia, New Zealand or Great
Britain.
+ If this option includes "cjk", East Asian characters are excluded from
+ spell checking. It is useful when editing mixed Asian and latin text.
*E757*
As a special case the name of a .spl file can be given as-is. The
first "_xx" in the name is removed and used as the region name
diff --git a/runtime/doc/spell.txt b/runtime/doc/spell.txt
--- a/runtime/doc/spell.txt
+++ b/runtime/doc/spell.txt
@@ -269,6 +269,12 @@
latin1 yi transliterated Yiddish
utf-8 yi-tr transliterated Yiddish
+ *spell-cjk*
+Chinese, Japanese or other East Asian characters are marked as error, because
+spell checking of these characters are not supported currently. If 'spelllang'
+includes "cjk", these characters are marked as OK. It is useful when editing
+mixed Asian and latin text.
+
SPELL FILES *spell-load*
diff --git a/src/mbyte.c b/src/mbyte.c
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -947,8 +947,8 @@
{
case 0x2121: /* ZENKAKU space */
return 0;
- case 0x2122: /* KU-TEN (Japanese comma) */
- case 0x2123: /* TOU-TEN (Japanese period) */
+ case 0x2122: /* TOU-TEN (Japanese comma) */
+ case 0x2123: /* KU-TEN (Japanese period) */
case 0x2124: /* ZENKAKU comma */
case 0x2125: /* ZENKAKU period */
return 1;
@@ -2490,9 +2490,9 @@
/* sorted list of non-overlapping intervals */
static struct clinterval
{
- unsigned short first;
- unsigned short last;
- unsigned short class;
+ unsigned int first;
+ unsigned int last;
+ unsigned int class;
} classes[] =
{
{0x037e, 0x037e, 1}, /* Greek question mark */
@@ -2557,6 +2557,10 @@
{0xff1a, 0xff20, 1}, /* half/fullwidth ASCII */
{0xff3b, 0xff40, 1}, /* half/fullwidth ASCII */
{0xff5b, 0xff65, 1}, /* half/fullwidth ASCII */
+ {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */
+ {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */
+ {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */
+ {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */
};
int bot = 0;
int top = sizeof(classes) / sizeof(struct clinterval) - 1;
@@ -2576,9 +2580,9 @@
while (top >= bot)
{
mid = (bot + top) / 2;
- if (classes[mid].last < c)
+ if (classes[mid].last < (unsigned int)c)
bot = mid + 1;
- else if (classes[mid].first > c)
+ else if (classes[mid].first > (unsigned int)c)
top = mid - 1;
else
return (int)classes[mid].class;
diff --git a/src/option.c b/src/option.c
--- a/src/option.c
+++ b/src/option.c
@@ -7151,6 +7151,11 @@
if (varp == &(curwin->w_s->b_p_spl))
{
char_u fname[200];
+ char_u *q = curwin->w_s->b_p_spl;
+
+ /* Skip if the first name is "cjk". */
+ if (STRNCMP(q, "cjk,", 4) == 0)
+ q += 4;
/*
* Source the spell/LANG.vim in 'runtimepath'.
@@ -7158,11 +7163,11 @@
* Use the first name in 'spelllang' up to '_region' or
* '.encoding'.
*/
- for (p = curwin->w_s->b_p_spl; *p != NUL; ++p)
+ for (p = q; *p != NUL; ++p)
if (vim_strchr((char_u *)"_.,", *p) != NULL)
break;
vim_snprintf((char *)fname, 200, "spell/%.*s.vim",
- (int)(p - curwin->w_s->b_p_spl), curwin->w_s->b_p_spl);
+ (int)(p - q), q);
source_runtime(fname, TRUE);
}
#endif
diff --git a/src/spell.c b/src/spell.c
--- a/src/spell.c
+++ b/src/spell.c
@@ -754,9 +754,9 @@
static void clear_spell_chartab __ARGS((spelltab_T *sp));
static int set_spell_finish __ARGS((spelltab_T *new_st));
static int spell_iswordp __ARGS((char_u *p, win_T *wp));
-static int spell_iswordp_nmw __ARGS((char_u *p));
-#ifdef FEAT_MBYTE
-static int spell_mb_isword_class __ARGS((int cl));
+static int spell_iswordp_nmw __ARGS((char_u *p, win_T *wp));
+#ifdef FEAT_MBYTE
+static int spell_mb_isword_class __ARGS((int cl, win_T *wp));
static int spell_iswordp_w __ARGS((int *p, win_T *wp));
#endif
static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
@@ -1149,7 +1149,7 @@
/* When we are at a non-word character there is no error, just
* skip over the character (try looking for a word after it). */
- else if (!spell_iswordp_nmw(ptr))
+ else if (!spell_iswordp_nmw(ptr, wp))
{
if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
{
@@ -1561,7 +1561,7 @@
* accept a no-caps word, even when the dictionary
* word specifies ONECAP. */
mb_ptr_back(mip->mi_word, p);
- if (spell_iswordp_nmw(p)
+ if (spell_iswordp_nmw(p, mip->mi_win)
? capflags == WF_ONECAP
: (flags & WF_ONECAP) != 0
&& capflags != WF_ONECAP)
@@ -4234,6 +4234,8 @@
if (spl_copy == NULL)
goto theend;
+ wp->w_s->b_cjk = 0;
+
/* loop over comma separated language names. */
for (splp = spl_copy; *splp != NUL; )
{
@@ -4242,6 +4244,12 @@
region = NULL;
len = (int)STRLEN(lang);
+ if (STRCMP(lang, "cjk") == 0)
+ {
+ wp->w_s->b_cjk = 1;
+ continue;
+ }
+
/* If the name ends in ".spl" use it as the name of the spell file.
* If there is a region name let "region" point to it and remove it
* from the name. */
@@ -4601,7 +4609,7 @@
int past_second = FALSE; /* past second word char */
/* find first letter */
- for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
+ for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p))
if (end == NULL ? *p == NUL : p >= end)
return 0; /* only non-word characters, illegal word */
#ifdef FEAT_MBYTE
@@ -4617,7 +4625,7 @@
* But a word with an upper char only at start is a ONECAP.
*/
for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
- if (spell_iswordp_nmw(p))
+ if (spell_iswordp_nmw(p, curwin))
{
c = PTR2CHAR(p);
if (!SPELL_ISUPPER(c))
@@ -9907,7 +9915,7 @@
c = mb_ptr2char(s);
if (c > 255)
- return spell_mb_isword_class(mb_get_class(s));
+ return spell_mb_isword_class(mb_get_class(s), wp);
return spelltab.st_isw[c];
}
#endif
@@ -9920,8 +9928,9 @@
* Unlike spell_iswordp() this doesn't check for "midword" characters.
*/
static int
-spell_iswordp_nmw(p)
+spell_iswordp_nmw(p, wp)
char_u *p;
+ win_T *wp;
{
#ifdef FEAT_MBYTE
int c;
@@ -9930,7 +9939,7 @@
{
c = mb_ptr2char(p);
if (c > 255)
- return spell_mb_isword_class(mb_get_class(p));
+ return spell_mb_isword_class(mb_get_class(p), wp);
return spelltab.st_isw[c];
}
#endif
@@ -9942,12 +9951,18 @@
* Return TRUE if word class indicates a word character.
* Only for characters above 255.
* Unicode subscript and superscript are not considered word characters.
- */
- static int
-spell_mb_isword_class(cl)
- int cl;
-{
- return cl >= 2 && cl != 0x2070 && cl != 0x2080;
+ * See also dbcs_class() and utf_class() in mbyte.c.
+ */
+ static int
+spell_mb_isword_class(cl, wp)
+ int cl;
+ win_T *wp;
+{
+ if (wp->w_s->b_cjk)
+ /* East Asian characters are not considered word characters. */
+ return cl == 2 || cl == 0x2800;
+ else
+ return cl >= 2 && cl != 0x2070 && cl != 0x2080;
}
/*
@@ -9971,9 +9986,10 @@
if (*s > 255)
{
if (enc_utf8)
- return spell_mb_isword_class(utf_class(*s));
+ return spell_mb_isword_class(utf_class(*s), wp);
if (enc_dbcs)
- return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
+ return spell_mb_isword_class(
+ dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
return 0;
}
return spelltab.st_isw[*s];
@@ -10193,13 +10209,13 @@
line = ml_get_curline();
p = line + curwin->w_cursor.col;
/* Backup to before start of word. */
- while (p > line && spell_iswordp_nmw(p))
+ while (p > line && spell_iswordp_nmw(p, curwin))
mb_ptr_back(line, p);
/* Forward to start of word. */
- while (*p != NUL && !spell_iswordp_nmw(p))
+ while (*p != NUL && !spell_iswordp_nmw(p, curwin))
mb_ptr_adv(p);
- if (!spell_iswordp_nmw(p)) /* No word found. */
+ if (!spell_iswordp_nmw(p, curwin)) /* No word found. */
{
beep_flush();
return;
@@ -10436,7 +10452,7 @@
for (;;)
{
mb_ptr_back(line, p);
- if (p == line || spell_iswordp_nmw(p))
+ if (p == line || spell_iswordp_nmw(p, curwin))
break;
if (vim_regexec(®match, p, 0)
&& regmatch.endp[0] == line + endcol)
@@ -11645,7 +11661,7 @@
/* When appending a compound word after a word character don't
* use Onecap. */
- if (p != NULL && spell_iswordp_nmw(p))
+ if (p != NULL && spell_iswordp_nmw(p, curwin))
c &= ~WF_ONECAP;
make_case_word(tword + sp->ts_splitoff,
preword + sp->ts_prewordlen, c);
@@ -11895,7 +11911,8 @@
* character when the word ends. But only when the
* good word can end. */
if (((!try_compound && !spell_iswordp_nmw(fword
- + sp->ts_fidx))
+ + sp->ts_fidx,
+ curwin))
|| fword_ends)
&& fword[sp->ts_fidx] != NUL
&& goodword_ends)
@@ -14226,7 +14243,7 @@
}
else
{
- if (spell_iswordp_nmw(s))
+ if (spell_iswordp_nmw(s, curwin))
*t++ = *s;
++s;
}
@@ -14521,7 +14538,7 @@
else
{
did_white = FALSE;
- if (!spell_iswordp_nmw(t))
+ if (!spell_iswordp_nmw(t, curwin))
continue;
}
}
@@ -16045,7 +16062,7 @@
for (p = line + startcol; p > line; )
{
mb_ptr_back(line, p);
- if (spell_iswordp_nmw(p))
+ if (spell_iswordp_nmw(p, curwin))
break;
}
diff --git a/src/structs.h b/src/structs.h
--- a/src/structs.h
+++ b/src/structs.h
@@ -1317,6 +1317,9 @@
regprog_T *b_cap_prog; /* program for 'spellcapcheck' */
char_u *b_p_spf; /* 'spellfile' */
char_u *b_p_spl; /* 'spelllang' */
+# ifdef FEAT_MBYTE
+ int b_cjk; /* all CJK letters as OK */
+# endif
#endif
#if !defined(FEAT_SYN_HL) && !defined(FEAT_SPELL)
int dummy;