There are three problems.
1. Tag search stuck in infinite loop.
$ cat tags
!_TAG_FILE_ENCODING utf-8 //
abcdefghijklmnopqrs foo.txt /foo.txt
$ vim -u NONE
:tag abcdefghijklmnopqrs
... infinite loop
It causes when specified encoding is same as &encoding, or is not
supported. Vim enter infinite loop when reading !_TAG_FILE_ENCODING
line while binary search.
Especially this problem troubles some Japanese user who use translated
help.
2. Tag search with pattern doesn't work.
I hope you can see multi-byte Japanese character.
$ echo $LANG
ja_JP.UTF-8
$ cat tags
!_TAG_FILE_ENCODING cp932 //
あいうえお foo.txt /foo.txt
(Actually, this command display broken text because terminal is utf-8
and tags is cp932)
$ vim -u NONE
:tag /.いうえお
E426: tag not found: .いうえお
Tags file have "あいうえお".
But "/.いうえお" doesn't match.
Vim encode the specified pattern to tags's encoding and try to match
tag name with the encoded pattern. But regexp code handle these
string as it is encoded with 'encoding' option. Therefore, in above
example, "." doesn't match multi-byte character "あ".
3. !_TAG_FILE_ENCODING may not be read.
$ cat tags
!_TAG_FILE_SORTED 1 //
!_TAG_FILE_ENCODING cp932 //
...
tagname foo.txt /SOME MULTIBYTE TEXT
Vim starts binary search immediately after reading !_TAG_FILE_SORTED line.
If binary search doesn't reach !_TAG_FILE_ENCODING line, tagaddress
is not converted with specified encoding.
Please check attached patch. What I did is
- Changed to read all !_TAG_XXX line first.
- Changed to convert encodings of all line to 'encoding' first, and
compare tag name as internal encoded text.
- Changed to disable binary-search when !_TAG_FILE_ENCODING is used and
conversion is needed (It means that it doesn't affect when specified
encoding is same as 'encoding' option).
By the way, this is other topic, I noticed while testing tag search, tag
completion (i_CTRL-X_CTRL-]) shows !_TAG_XXX tag name. I think that it
should be hidden because it is not useful information.
--
Yukihiro Nakadaira - [email protected]
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
diff -r 15b934a16641 src/tag.c
--- a/src/tag.c Wed Sep 14 19:04:40 2011 +0200
+++ b/src/tag.c Thu Oct 06 22:31:03 2011 +0900
@@ -1360,13 +1360,9 @@
char_u *saved_pat = NULL; /* copy of pat[] */
#endif
- /* Use two sets of variables for the pattern: "orgpat" holds the values
- * for the original pattern and "convpat" converted from 'encoding' to
- * encoding of the tags file. "pats" point to either one of these. */
pat_T *pats;
pat_T orgpat; /* holds unconverted pattern info */
#ifdef FEAT_MBYTE
- pat_T convpat; /* holds converted pattern info */
vimconv_T vimconv;
#endif
@@ -1376,6 +1372,7 @@
int sort_error = FALSE; /* tags file not sorted */
int linear; /* do a linear search */
int sortic = FALSE; /* tag file sorted in nocase */
+ int tag_file_sorted = NUL; /* !_TAG_FILE_SORTED value */
#endif
int line_error = FALSE; /* syntax error */
int has_re = (flags & TAG_REGEXP); /* regexp used */
@@ -1683,6 +1680,14 @@
if (eof)
{
+ /* While reading !_TAG_XXX infomation, retry with
+ * linear search to ensure these tags are match. */
+ if (state == TS_START)
+ {
+ state = TS_LINEAR;
+ rewind(fp);
+ continue;
+ }
#ifdef FEAT_EMACS_TAGS
if (incstack_idx) /* this was an included file */
{
@@ -1701,6 +1706,29 @@
}
line_read_in:
+#ifdef FEAT_MBYTE
+ if (vimconv.vc_type != CONV_NONE)
+ {
+ char_u *conv_line;
+
+ conv_line = string_convert(&vimconv, lbuf, NULL);
+ if (conv_line != NULL)
+ {
+ /* Copy or swap lbuf and conv_line. */
+ if (STRLEN(conv_line) + 1 > LSIZE)
+ {
+ vim_free(lbuf);
+ lbuf = conv_line;
+ }
+ else
+ {
+ vim_strncpy(lbuf, conv_line, LSIZE - 1);
+ vim_free(conv_line);
+ }
+ }
+ }
+#endif
+
#ifdef FEAT_EMACS_TAGS
/*
* Emacs tags line with CTRL-L: New file name on next line.
@@ -1770,6 +1798,33 @@
*/
if (state == TS_START)
{
+ /* Read flags. */
+ if (STRNCMP(lbuf, "!_TAG_", 6) <= 0)
+ {
+#ifdef FEAT_TAG_BINS
+ if (STRNCMP(lbuf, "!_TAG_FILE_SORTED\t", 18) == 0)
+ {
+ tag_file_sorted = lbuf[18];
+ continue;
+ }
+#endif
+#ifdef FEAT_MBYTE
+ if (STRNCMP(lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0)
+ {
+ for (p = lbuf + 20; *p > ' ' && *p < 127; ++p)
+ ;
+ *p = NUL;
+ /* Prepare for converting each line. */
+ convert_setup(&vimconv, lbuf + 20, p_enc);
+ continue;
+ }
+#endif
+ /* Ignore unknown flag. */
+ continue;
+ }
+
+ /* Headers ends. */
+
#ifdef FEAT_TAG_BINS
/*
* When there is no tag head, or ignoring case, need to do a
@@ -1779,29 +1834,28 @@
* When "!_TAG_FILE_SORTED" found: start binary search if
* flag set.
* For cscope, it's always linear.
+ * When encoding conversion is enabled, use linear.
*/
+ if (linear)
+ state = TS_LINEAR;
# ifdef FEAT_CSCOPE
- if (linear || use_cscope)
-# else
- if (linear)
+ else if (use_cscope)
+ state = TS_LINEAR;
# endif
+# ifdef FEAT_MBYTE
+ else if (vimconv.vc_type != CONV_NONE)
state = TS_LINEAR;
- else if (STRNCMP(lbuf, "!_TAG_", 6) > 0)
+# endif
+ else if (tag_file_sorted == '1')
state = TS_BINARY;
- else if (STRNCMP(lbuf, "!_TAG_FILE_SORTED\t", 18) == 0)
+ else if (tag_file_sorted == '2')
{
- /* Check sorted flag */
- if (lbuf[18] == '1')
- state = TS_BINARY;
- else if (lbuf[18] == '2')
- {
- state = TS_BINARY;
- sortic = TRUE;
- pats->regmatch.rm_ic = (p_ic || !noic);
- }
- else
- state = TS_LINEAR;
+ state = TS_BINARY;
+ sortic = TRUE;
+ pats->regmatch.rm_ic = (p_ic || !noic);
}
+ else
+ state = TS_LINEAR;
if (state == TS_BINARY && pats->regmatch.rm_ic && !sortic)
{
@@ -1841,36 +1895,12 @@
continue;
}
#endif
+
+ /* Start linear search in the start of the file. */
+ rewind(fp);
+ continue;
}
-#ifdef FEAT_MBYTE
- if (lbuf[0] == '!' && pats == &orgpat
- && STRNCMP(lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0)
- {
- /* Convert the search pattern from 'encoding' to the
- * specified encoding. */
- for (p = lbuf + 20; *p > ' ' && *p < 127; ++p)
- ;
- *p = NUL;
- convert_setup(&vimconv, p_enc, lbuf + 20);
- if (vimconv.vc_type != CONV_NONE)
- {
- convpat.pat = string_convert(&vimconv, pats->pat, NULL);
- if (convpat.pat != NULL)
- {
- pats = &convpat;
- pats->len = (int)STRLEN(pats->pat);
- prepare_pats(pats, has_re);
- pats->regmatch.rm_ic = orgpat.regmatch.rm_ic;
- }
- }
-
- /* Prepare for converting a match the other way around. */
- convert_setup(&vimconv, lbuf + 20, p_enc);
- continue;
- }
-#endif
-
/*
* Figure out where the different strings are in this line.
* For "normal" tags: Do a quick check if the tag matches.
@@ -2187,35 +2217,6 @@
*/
if (ga_grow(&ga_match[mtt], 1) == OK)
{
-#ifdef FEAT_MBYTE
- char_u *conv_line = NULL;
- char_u *lbuf_line = lbuf;
-
- if (vimconv.vc_type != CONV_NONE)
- {
- /* Convert the tag line from the encoding of the tags
- * file to 'encoding'. Then parse the line again. */
- conv_line = string_convert(&vimconv, lbuf, NULL);
- if (conv_line != NULL)
- {
- if (parse_tag_line(conv_line,
-#ifdef FEAT_EMACS_TAGS
- is_etag,
-#endif
- &tagp) == OK)
- lbuf_line = conv_line;
- else
- /* doesn't work, go back to unconverted line. */
- (void)parse_tag_line(lbuf,
-#ifdef FEAT_EMACS_TAGS
- is_etag,
-#endif
- &tagp);
- }
- }
-#else
-# define lbuf_line lbuf
-#endif
if (help_only)
{
#ifdef FEAT_MULTI_LANG
@@ -2307,7 +2308,7 @@
* without Emacs tags: <mtt><tag_fname><NUL><lbuf>
*/
len = (int)STRLEN(tag_fname)
- + (int)STRLEN(lbuf_line) + 3;
+ + (int)STRLEN(lbuf) + 3;
#ifdef FEAT_EMACS_TAGS
if (is_etag)
len += (int)STRLEN(ebuf) + 1;
@@ -2337,7 +2338,7 @@
else
*s++ = NUL;
#endif
- STRCPY(s, lbuf_line);
+ STRCPY(s, lbuf);
}
}
@@ -2373,10 +2374,6 @@
else
vim_free(mfp);
}
-#ifdef FEAT_MBYTE
- /* Note: this makes the values in "tagp" invalid! */
- vim_free(conv_line);
-#endif
}
else /* Out of memory! Just forget about the rest. */
{
@@ -2415,19 +2412,15 @@
}
#endif
#ifdef FEAT_MBYTE
- if (pats == &convpat)
- {
- /* Go back from converted pattern to original pattern. */
- vim_free(pats->pat);
- vim_free(pats->regmatch.regprog);
- orgpat.regmatch.rm_ic = pats->regmatch.rm_ic;
- pats = &orgpat;
- }
if (vimconv.vc_type != CONV_NONE)
convert_setup(&vimconv, NULL, NULL);
#endif
#ifdef FEAT_TAG_BINS
+ tag_file_sorted = NUL;
+#endif
+
+#ifdef FEAT_TAG_BINS
if (sort_error)
{
EMSG2(_("E432: Tags file not sorted: %s"), tag_fname);
@@ -2461,7 +2454,8 @@
#ifdef FEAT_TAG_BINS
/* stop searching when already did a linear search, or when TAG_NOIC
* used, and 'ignorecase' not set or already did case-ignore search */
- if (stop_searching || linear || (!p_ic && noic) || pats->regmatch.rm_ic)
+ if (stop_searching || linear || (!p_ic && noic) || pats->regmatch.rm_ic
+ || state == TS_LINEAR)
break;
# ifdef FEAT_CSCOPE
if (use_cscope)