There are three problems.

1. Tag search stuck in infinite loop.

  $ cat tags
  !_TAG_FILE_ENCODING   utf-8   //
  abcdefghijklmnopqrs   foo.txt /foo.txt

  $ vim -u NONE
  :tag abcdefghijklmnopqrs
  ... infinite loop

  It causes when specified encoding is same as &encoding, or is not
  supported.  Vim enter infinite loop when reading !_TAG_FILE_ENCODING
  line while binary search.

  Especially this problem troubles some Japanese user who use translated
  help.


2. Tag search with pattern doesn't work.

  I hope you can see multi-byte Japanese character.

  $ echo $LANG
  ja_JP.UTF-8

  $ cat tags
  !_TAG_FILE_ENCODING   cp932   //
  あいうえお foo.txt /foo.txt

  (Actually, this command display broken text because terminal is utf-8
  and tags is cp932)

  $ vim -u NONE
  :tag /.いうえお
  E426: tag not found: .いうえお

  Tags file have "あいうえお".
  But "/.いうえお" doesn't match.

  Vim encode the specified pattern to tags's encoding and try to match
  tag name with the encoded pattern.  But regexp code handle these
  string as it is encoded with 'encoding' option. Therefore, in above
  example, "." doesn't match multi-byte character "あ".


3. !_TAG_FILE_ENCODING may not be read.

  $ cat tags
  !_TAG_FILE_SORTED     1       //
  !_TAG_FILE_ENCODING   cp932   //
  ...
  tagname       foo.txt /SOME MULTIBYTE TEXT

  Vim starts binary search immediately after reading !_TAG_FILE_SORTED line.
  If binary search doesn't reach !_TAG_FILE_ENCODING line,  tagaddress
  is not converted with specified encoding.


Please check attached patch.  What I did is

- Changed to read all !_TAG_XXX line first.

- Changed to convert encodings of all line to 'encoding' first, and
  compare tag name as internal encoded text.

- Changed to disable binary-search when !_TAG_FILE_ENCODING is used and
  conversion is needed (It means that it doesn't affect when specified
  encoding is same as 'encoding' option).





By the way, this is other topic, I noticed while testing tag search, tag
completion (i_CTRL-X_CTRL-]) shows !_TAG_XXX tag name.  I think that it
should be hidden because it is not useful information.

-- 
Yukihiro Nakadaira - [email protected]

-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
diff -r 15b934a16641 src/tag.c
--- a/src/tag.c	Wed Sep 14 19:04:40 2011 +0200
+++ b/src/tag.c	Thu Oct 06 22:31:03 2011 +0900
@@ -1360,13 +1360,9 @@
     char_u	*saved_pat = NULL;		/* copy of pat[] */
 #endif
 
-    /* Use two sets of variables for the pattern: "orgpat" holds the values
-     * for the original pattern and "convpat" converted from 'encoding' to
-     * encoding of the tags file.  "pats" point to either one of these. */
     pat_T	*pats;
     pat_T	orgpat;			/* holds unconverted pattern info */
 #ifdef FEAT_MBYTE
-    pat_T	convpat;		/* holds converted pattern info */
     vimconv_T	vimconv;
 #endif
 
@@ -1376,6 +1372,7 @@
     int		sort_error = FALSE;		/* tags file not sorted */
     int		linear;				/* do a linear search */
     int		sortic = FALSE;			/* tag file sorted in nocase */
+    int		tag_file_sorted = NUL;		/* !_TAG_FILE_SORTED value */
 #endif
     int		line_error = FALSE;		/* syntax error */
     int		has_re = (flags & TAG_REGEXP);	/* regexp used */
@@ -1683,6 +1680,14 @@
 
 		if (eof)
 		{
+		    /* While reading !_TAG_XXX infomation, retry with
+		     * linear search to ensure these tags are match. */
+		    if (state == TS_START)
+		    {
+			state = TS_LINEAR;
+			rewind(fp);
+			continue;
+		    }
 #ifdef FEAT_EMACS_TAGS
 		    if (incstack_idx)	/* this was an included file */
 		    {
@@ -1701,6 +1706,29 @@
 	    }
 line_read_in:
 
+#ifdef FEAT_MBYTE
+	    if (vimconv.vc_type != CONV_NONE)
+	    {
+		char_u *conv_line;
+
+		conv_line = string_convert(&vimconv, lbuf, NULL);
+		if (conv_line != NULL)
+		{
+		    /* Copy or swap lbuf and conv_line. */
+		    if (STRLEN(conv_line) + 1 > LSIZE)
+		    {
+			vim_free(lbuf);
+			lbuf = conv_line;
+		    }
+		    else
+		    {
+			vim_strncpy(lbuf, conv_line, LSIZE - 1);
+			vim_free(conv_line);
+		    }
+		}
+	    }
+#endif
+
 #ifdef FEAT_EMACS_TAGS
 	    /*
 	     * Emacs tags line with CTRL-L: New file name on next line.
@@ -1770,6 +1798,33 @@
 	     */
 	    if (state == TS_START)
 	    {
+		/* Read flags. */
+		if (STRNCMP(lbuf, "!_TAG_", 6) <= 0)
+		{
+#ifdef FEAT_TAG_BINS
+		    if (STRNCMP(lbuf, "!_TAG_FILE_SORTED\t", 18) == 0)
+		    {
+			tag_file_sorted = lbuf[18];
+			continue;
+		    }
+#endif
+#ifdef FEAT_MBYTE
+		    if (STRNCMP(lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0)
+		    {
+			for (p = lbuf + 20; *p > ' ' && *p < 127; ++p)
+			    ;
+			*p = NUL;
+			/* Prepare for converting each line. */
+			convert_setup(&vimconv, lbuf + 20, p_enc);
+			continue;
+		    }
+#endif
+		    /* Ignore unknown flag. */
+		    continue;
+		}
+
+		/* Headers ends. */
+
 #ifdef FEAT_TAG_BINS
 		/*
 		 * When there is no tag head, or ignoring case, need to do a
@@ -1779,29 +1834,28 @@
 		 * When "!_TAG_FILE_SORTED" found: start binary search if
 		 * flag set.
 		 * For cscope, it's always linear.
+		 * When encoding conversion is enabled, use linear.
 		 */
+		if (linear)
+		    state = TS_LINEAR;
 # ifdef FEAT_CSCOPE
-		if (linear || use_cscope)
-# else
-		if (linear)
+		else if (use_cscope)
+		    state = TS_LINEAR;
 # endif
+# ifdef FEAT_MBYTE
+		else if (vimconv.vc_type != CONV_NONE)
 		    state = TS_LINEAR;
-		else if (STRNCMP(lbuf, "!_TAG_", 6) > 0)
+# endif
+		else if (tag_file_sorted == '1')
 		    state = TS_BINARY;
-		else if (STRNCMP(lbuf, "!_TAG_FILE_SORTED\t", 18) == 0)
+		else if (tag_file_sorted == '2')
 		{
-		    /* Check sorted flag */
-		    if (lbuf[18] == '1')
-			state = TS_BINARY;
-		    else if (lbuf[18] == '2')
-		    {
-			state = TS_BINARY;
-			sortic = TRUE;
-			pats->regmatch.rm_ic = (p_ic || !noic);
-		    }
-		    else
-			state = TS_LINEAR;
+		    state = TS_BINARY;
+		    sortic = TRUE;
+		    pats->regmatch.rm_ic = (p_ic || !noic);
 		}
+		else
+		    state = TS_LINEAR;
 
 		if (state == TS_BINARY && pats->regmatch.rm_ic && !sortic)
 		{
@@ -1841,36 +1895,12 @@
 		    continue;
 		}
 #endif
+
+                /* Start linear search in the start of the file. */
+                rewind(fp);
+                continue;
 	    }
 
-#ifdef FEAT_MBYTE
-	    if (lbuf[0] == '!' && pats == &orgpat
-			   && STRNCMP(lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0)
-	    {
-		/* Convert the search pattern from 'encoding' to the
-		 * specified encoding. */
-		for (p = lbuf + 20; *p > ' ' && *p < 127; ++p)
-		    ;
-		*p = NUL;
-		convert_setup(&vimconv, p_enc, lbuf + 20);
-		if (vimconv.vc_type != CONV_NONE)
-		{
-		    convpat.pat = string_convert(&vimconv, pats->pat, NULL);
-		    if (convpat.pat != NULL)
-		    {
-			pats = &convpat;
-			pats->len = (int)STRLEN(pats->pat);
-			prepare_pats(pats, has_re);
-			pats->regmatch.rm_ic = orgpat.regmatch.rm_ic;
-		    }
-		}
-
-		/* Prepare for converting a match the other way around. */
-		convert_setup(&vimconv, lbuf + 20, p_enc);
-		continue;
-	    }
-#endif
-
 	    /*
 	     * Figure out where the different strings are in this line.
 	     * For "normal" tags: Do a quick check if the tag matches.
@@ -2187,35 +2217,6 @@
 		 */
 		if (ga_grow(&ga_match[mtt], 1) == OK)
 		{
-#ifdef FEAT_MBYTE
-		    char_u	*conv_line = NULL;
-		    char_u	*lbuf_line = lbuf;
-
-		    if (vimconv.vc_type != CONV_NONE)
-		    {
-			/* Convert the tag line from the encoding of the tags
-			 * file to 'encoding'.  Then parse the line again. */
-			conv_line = string_convert(&vimconv, lbuf, NULL);
-			if (conv_line != NULL)
-			{
-			    if (parse_tag_line(conv_line,
-#ifdef FEAT_EMACS_TAGS
-					is_etag,
-#endif
-					&tagp) == OK)
-				lbuf_line = conv_line;
-			    else
-				/* doesn't work, go back to unconverted line. */
-				(void)parse_tag_line(lbuf,
-#ifdef FEAT_EMACS_TAGS
-						     is_etag,
-#endif
-						     &tagp);
-			}
-		    }
-#else
-# define lbuf_line lbuf
-#endif
 		    if (help_only)
 		    {
 #ifdef FEAT_MULTI_LANG
@@ -2307,7 +2308,7 @@
 			 * without Emacs tags: <mtt><tag_fname><NUL><lbuf>
 			 */
 			len = (int)STRLEN(tag_fname)
-						 + (int)STRLEN(lbuf_line) + 3;
+						 + (int)STRLEN(lbuf) + 3;
 #ifdef FEAT_EMACS_TAGS
 			if (is_etag)
 			    len += (int)STRLEN(ebuf) + 1;
@@ -2337,7 +2338,7 @@
 			    else
 				*s++ = NUL;
 #endif
-			    STRCPY(s, lbuf_line);
+			    STRCPY(s, lbuf);
 			}
 		    }
 
@@ -2373,10 +2374,6 @@
 			else
 			    vim_free(mfp);
 		    }
-#ifdef FEAT_MBYTE
-		    /* Note: this makes the values in "tagp" invalid! */
-		    vim_free(conv_line);
-#endif
 		}
 		else    /* Out of memory! Just forget about the rest. */
 		{
@@ -2415,19 +2412,15 @@
 	}
 #endif
 #ifdef FEAT_MBYTE
-	if (pats == &convpat)
-	{
-	    /* Go back from converted pattern to original pattern. */
-	    vim_free(pats->pat);
-	    vim_free(pats->regmatch.regprog);
-	    orgpat.regmatch.rm_ic = pats->regmatch.rm_ic;
-	    pats = &orgpat;
-	}
 	if (vimconv.vc_type != CONV_NONE)
 	    convert_setup(&vimconv, NULL, NULL);
 #endif
 
 #ifdef FEAT_TAG_BINS
+	tag_file_sorted = NUL;
+#endif
+
+#ifdef FEAT_TAG_BINS
 	if (sort_error)
 	{
 	    EMSG2(_("E432: Tags file not sorted: %s"), tag_fname);
@@ -2461,7 +2454,8 @@
 #ifdef FEAT_TAG_BINS
       /* stop searching when already did a linear search, or when TAG_NOIC
        * used, and 'ignorecase' not set or already did case-ignore search */
-      if (stop_searching || linear || (!p_ic && noic) || pats->regmatch.rm_ic)
+      if (stop_searching || linear || (!p_ic && noic) || pats->regmatch.rm_ic
+	      || state == TS_LINEAR)
 	  break;
 # ifdef FEAT_CSCOPE
       if (use_cscope)

Raspunde prin e-mail lui