Hello, the first attachment is a test case for iconv. It is the same I've sent before but additionally testing the toybox default of echoign characters in illegal sequences to stdout, and adding the "-s" option to suppress possible output to stderr.
Currently toybox passes none of the tests. The second attachment is a patch against the current toy adding the missing options (ignoring them) and making the toy at least pass the first test. (It should also pass the third test.) I'm not sure what would be the best way to make it pass the third test (testing illegal sequences). xwrite() should not repeatedly output from outstart(=toybuf+2048) when there are many illegal characters in a row. Anyway, the test cases should help with debugging. Then for your entertainment attached an iconv.c with my strange error checking loop which passes all (three...) tests. Felix
#!/bin/bash [ -f testing.sh ] && . testing.sh #testing "name" "command" "result" "infile" "stdin" iso=$(printf '\357') utf=$(printf '\303\257') # "ï" printf a > iso printf a > utf for i in $(seq 4096) do printf "$iso" >> iso printf "$utf" >> utf done testing "iconv" "iconv -f iso-8859-1 iso" "$(cat utf)" "" "" testing "iconv -c" "iconv -cs -f utf-8 iso" "a" "" "" # toybox specific: assumes that bad characters are echoed by default testing "iconv (echo)" "iconv -s -f utf-8 iso" "$(cat iso)" "" "" rm iso utf
# HG changeset patch # User Felix Janda <[email protected]> # Date 1401527910 -7200 # Sat May 31 11:18:30 2014 +0200 # Node ID c4c4d6103f8668fae63ec80cd9d58cdbbe226314 # Parent 78a3eaf5555f7a480ba97cd5906f0a44c9956698 iconv: some fixes - fix problem with sequences at buffer boundaries - add (ignored) -c and -s options - don't try to continue with a file when read() fails diff -r 78a3eaf5555f -r c4c4d6103f86 toys/pending/iconv.c --- a/toys/pending/iconv.c Thu May 29 08:21:48 2014 -0500 +++ b/toys/pending/iconv.c Sat May 31 11:18:30 2014 +0200 @@ -4,7 +4,7 @@ * * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/iconv.html -USE_ICONV(NEWTOY(iconv, "t:f:", TOYFLAG_USR|TOYFLAG_BIN)) +USE_ICONV(NEWTOY(iconv, "cst:f:", TOYFLAG_USR|TOYFLAG_BIN)) config ICONV bool "iconv" @@ -38,16 +38,19 @@ do { size_t outleft = 2048; - char *in = toybuf, *out = outstart; + char *in = toybuf+inleft, *out = outstart; - len = read(fd, toybuf+inleft, 2048-inleft); + len = read(fd, in, 2048-inleft); - if (len < 0) perror_msg("read '%s'"); + if (len < 0) { + perror_msg("read '%s'"); + return; + } inleft += len; do { if (iconv(TT.ic, &in, &inleft, &out, &outleft) == -1 - && (errno == EILSEQ || (in == toybuf && errno == EINVAL))) + && (errno == EILSEQ || (in == toybuf+inleft-len && errno == EINVAL))) { if (outleft) { // Skip first byte of illegal sequence to avoid endless loops
/* iconv.c - Convert character encoding * * Copyright 2014 Felix Janda <[email protected]> * * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/ USE_ICONV(NEWTOY(iconv, "cst:f:", TOYFLAG_USR|TOYFLAG_BIN)) config ICONV bool "iconv" default n help usage: iconv [-cs] [-f FROM] [-t TO] [FILE...] Convert character encoding of files. -f convert from (default utf8) -t convert to (default utf8) -c omit bad characters -s no error messages */ #define FOR_iconv #include "toys.h" #include <iconv.h> GLOBALS( char *from; char *to; void *ic; ) static void do_iconv(int fd, char *name) { size_t inleft = 0, outleft = 2048; char *in = toybuf, *out = toybuf + 2048; ssize_t len, total = 0; while (1) { if (!inleft || (errno == EINVAL)) { memmove(toybuf, in, inleft); len = read(fd, toybuf + inleft, 2048 - inleft); if (len < 0) { perror_msg("read '%s'"); return; } inleft += len; total += len; if (!len) { xwrite(1, toybuf + 2048, 2048 - outleft); if (!(toys.optflags & FLAG_c)) xwrite(1, toybuf, inleft); break; } in = toybuf; } else if (!outleft || (errno == E2BIG)) { xwrite(1, toybuf + 2048, 2048 - outleft); out = toybuf + 2048; outleft = 2048; } else if (errno == EILSEQ) { if (!(toys.optflags & FLAG_s)) error_msg("bad character at %ld", total - inleft); if (!(toys.optflags & FLAG_c)) { *(out++) = *in; outleft--; } in++; inleft--; } if (iconv(TT.ic, &in, &inleft, &out, &outleft) != (size_t)-1) errno = 0; } } void iconv_main(void) { TT.ic = iconv_open(TT.to ? TT.to : "utf8", TT.from ? TT.from : "utf8"); if (TT.ic == (iconv_t)-1) error_exit("bad encoding"); loopfiles(toys.optargs, do_iconv); if (CFG_TOYBOX_FREE) iconv_close(TT.ic); }
_______________________________________________ Toybox mailing list [email protected] http://lists.landley.net/listinfo.cgi/toybox-landley.net
