Hello,

the first attachment is a test case for iconv. It is the same I've sent
before but additionally testing the toybox default of echoign characters
in illegal sequences to stdout, and adding the "-s" option to suppress
possible output to stderr.

Currently toybox passes none of the tests. The second attachment is a
patch against the current toy adding the missing options (ignoring them)
and making the toy at least pass the first test. (It should also pass
the third test.)

I'm not sure what would be the best way to make it pass the third test
(testing illegal sequences). xwrite() should not repeatedly output from
outstart(=toybuf+2048) when there are many illegal characters in a row.
Anyway, the test cases should help with debugging.

Then for your entertainment attached an iconv.c with my strange error
checking loop which passes all (three...) tests.

Felix
#!/bin/bash

[ -f testing.sh ] && . testing.sh

#testing "name" "command" "result" "infile" "stdin"

iso=$(printf '\357')
utf=$(printf '\303\257') # "ï"
printf a > iso
printf a > utf
for i in $(seq 4096)
do
  printf "$iso" >> iso
  printf "$utf" >> utf
done

testing "iconv" "iconv -f iso-8859-1 iso" "$(cat utf)" "" ""
testing "iconv -c" "iconv -cs -f utf-8 iso" "a" "" ""

# toybox specific: assumes that bad characters are echoed by default
testing "iconv (echo)" "iconv -s -f utf-8 iso" "$(cat iso)" "" ""

rm iso utf
# HG changeset patch
# User Felix Janda <[email protected]>
# Date 1401527910 -7200
#      Sat May 31 11:18:30 2014 +0200
# Node ID c4c4d6103f8668fae63ec80cd9d58cdbbe226314
# Parent  78a3eaf5555f7a480ba97cd5906f0a44c9956698
iconv: some fixes

- fix problem with sequences at buffer boundaries
- add (ignored) -c and -s options
- don't try to continue with a file when read() fails

diff -r 78a3eaf5555f -r c4c4d6103f86 toys/pending/iconv.c
--- a/toys/pending/iconv.c	Thu May 29 08:21:48 2014 -0500
+++ b/toys/pending/iconv.c	Sat May 31 11:18:30 2014 +0200
@@ -4,7 +4,7 @@
  *
  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/iconv.html
 
-USE_ICONV(NEWTOY(iconv, "t:f:", TOYFLAG_USR|TOYFLAG_BIN))
+USE_ICONV(NEWTOY(iconv, "cst:f:", TOYFLAG_USR|TOYFLAG_BIN))
 
 config ICONV
   bool "iconv"
@@ -38,16 +38,19 @@
 
   do {
     size_t outleft = 2048;
-    char *in = toybuf, *out = outstart;
+    char *in = toybuf+inleft, *out = outstart;
 
-    len = read(fd, toybuf+inleft, 2048-inleft);
+    len = read(fd, in, 2048-inleft);
 
-    if (len < 0) perror_msg("read '%s'");
+    if (len < 0) {
+      perror_msg("read '%s'");
+      return;
+    }
     inleft += len;
 
     do {
       if (iconv(TT.ic, &in, &inleft, &out, &outleft) == -1
-          && (errno == EILSEQ || (in == toybuf && errno == EINVAL)))
+          && (errno == EILSEQ || (in == toybuf+inleft-len && errno == EINVAL)))
       {
         if (outleft) {
           // Skip first byte of illegal sequence to avoid endless loops
/* iconv.c - Convert character encoding
 *
 * Copyright 2014 Felix Janda <[email protected]>
 *
 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/

USE_ICONV(NEWTOY(iconv, "cst:f:", TOYFLAG_USR|TOYFLAG_BIN))

config ICONV
  bool "iconv"
  default n
  help
    usage: iconv [-cs] [-f FROM] [-t TO] [FILE...]

    Convert character encoding of files.

    -f  convert from (default utf8)
    -t  convert to   (default utf8)
    -c  omit bad characters
    -s  no error messages

*/

#define FOR_iconv
#include "toys.h"
#include <iconv.h>

GLOBALS(
  char *from;
  char *to;

  void *ic;
)

static void do_iconv(int fd, char *name)
{
  size_t inleft = 0, outleft = 2048;
  char *in = toybuf, *out = toybuf + 2048;
  ssize_t len, total = 0;

  while (1) {
    if (!inleft || (errno == EINVAL)) {
      memmove(toybuf, in, inleft);
      len = read(fd, toybuf + inleft, 2048 - inleft);

      if (len < 0) {
        perror_msg("read '%s'");
        return;
      }
      inleft += len;
      total += len;
      if (!len) {
        xwrite(1, toybuf + 2048, 2048 - outleft);
        if (!(toys.optflags & FLAG_c)) xwrite(1, toybuf, inleft);
        break;
      }
      in = toybuf;
    } else if (!outleft || (errno == E2BIG)) {
      xwrite(1, toybuf + 2048, 2048 - outleft);
      out = toybuf + 2048;
      outleft = 2048;
    } else if (errno == EILSEQ) {
      if (!(toys.optflags & FLAG_s))
        error_msg("bad character at %ld", total - inleft);
      if (!(toys.optflags & FLAG_c)) {
        *(out++) = *in;
        outleft--;
      }
      in++;
      inleft--;
    }
    if (iconv(TT.ic, &in, &inleft, &out, &outleft) != (size_t)-1) errno = 0;
  }
}

void iconv_main(void)
{
  TT.ic = iconv_open(TT.to ? TT.to : "utf8", TT.from ? TT.from : "utf8");
  if (TT.ic == (iconv_t)-1) error_exit("bad encoding");
  loopfiles(toys.optargs, do_iconv);
  if (CFG_TOYBOX_FREE) iconv_close(TT.ic);
}
_______________________________________________
Toybox mailing list
[email protected]
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to