Andrey Chernomyrdin wrote:
>
> Hi!
>
> Вот тут столкнулся с проблемой...
> - --
> $ echo -ne "\214\n" | iconv -t windows-1251 -f koi8-r
> iconv: illegal input sequence at position 0
> - --
>
> Что делать и как с этим боротся, то есть не обрабатывает все символы которые
> не изветсны в windows-1251...
Маленький патч для glibc из MDK RE, автор Дмитрий Левин.
Добавляет опцию -r, которая позволяет определить символ, замещающий
неизвестные. Без нее все как обычно.
Не знаю, войдет ли в 2.2.2.
Rgrds, AEN
>
--- glibc-2.2-orig/iconv/iconv_prog.c Thu Sep 7 22:56:23 2000
+++ glibc-2.2/iconv/iconv_prog.c Wed Nov 15 04:48:21 2000
@@ -59,6 +59,7 @@
{ "list", 'l', NULL, 0, N_("list all known coded character sets") },
{ NULL, 0, NULL, 0, N_("Output control:") },
{ NULL, 'c', NULL, 0, N_("omit invalid characters from output") },
+ { "replace", 'r', "SYMBOL", OPTION_ARG_OPTIONAL, N_("replace invalid
characters with specified symbol") },
{ "output", 'o', "FILE", 0, N_("output file") },
{ "silent", 's', NULL, 0, N_("suppress warnings") },
{ "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
@@ -99,6 +100,7 @@
/* If nonzero omit invalid character from output. */
static int omit_invalid;
+static char replace_invalid;
/* Prototypes for the functions doing the actual work. */
static int process_block (iconv_t cd, char *addr, size_t len, FILE *output);
@@ -314,6 +316,10 @@
/* Omit invalid characters from output. */
omit_invalid = 1;
break;
+ case 'r':
+ /* Replace invalid characters. */
+ replace_invalid = (arg && *arg) ? *arg : '?';
+ break;
case OPT_VERBOSE:
verbose = 1;
break;
@@ -356,6 +362,23 @@
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
+static int
+write_invalid( iconv_t cd, char **addr, size_t *len, FILE *output )
+{
+ int errno_save = errno;
+ int needed_from = ((__gconv_t) cd)->__steps->__min_needed_from;
+ if ( fwrite( &replace_invalid, 1, 1, output ) < 1 || ferror( output ) )
+ {
+ /* Error occurred while printing replace symbol. */
+ error (0, 0, _("conversion stopped due to problem in writing the output"));
+ return -1;
+ }
+
+ errno = errno_save;
+ *addr += needed_from;
+ *len -= needed_from;
+ return 0;
+}
static int
process_block (iconv_t cd, char *addr, size_t len, FILE *output)
@@ -424,22 +447,34 @@
switch (errno)
{
case EILSEQ:
+ if ( replace_invalid )
+ {
+ if ( write_invalid( cd, &addr, &len, output ) )
+ return -1;
+ else
+ break;
+ }
error (0, 0, _("illegal input sequence at position %ld"),
(long) (addr - start));
- break;
+ return -1;
case EINVAL:
+ if ( replace_invalid )
+ {
+ if ( write_invalid( cd, &addr, &len, output ) )
+ return -1;
+ else
+ break;
+ }
error (0, 0, _("\
incomplete character or shift sequence at end of buffer"));
- break;
+ return -1;
case EBADF:
error (0, 0, _("internal error (illegal descriptor)"));
- break;
+ return -1;
default:
error (0, 0, _("unknown iconv() error %d"), errno);
- break;
+ return -1;
}
-
- return -1;
}
}