Hello Hrvoje and Dan,

I have been using wget for many years now, and finally got to applying
a patch I made long ago (EBCDIC patch against wget-1.5.3) to the
current wget-1.8.2. This patch makes wget compile and run on a
mainframe computer using the EBCDIC character set.

Also, when compiling wget on Solaris (using the SUNWspro "Forte"
compiler), I stumbled over a portability problem (C++ comments in a 
C source) to which I add a patch as well.

About the EBCDIC patch:
* The goal was to create a patch which worked for our EBCDIC system
  (Fujitsu-Siemens' mainframe OS is called BS2000, it runs on /390
  hardware, but is not compatible with OS/390 per se) but would be
  easily adaptable to OS/390 (to which I have no access, but whose
  behaviour I know from similar ports). The code to actually make
  it work for OS/390 is not in place, but I added a tool (called
  safe-ctype-mk.c -- delete if you don't like it) to create the
  additions to safe-ctype.c which are necessary because IBM's
  EBCDIC differs from "our" EBCDIC.

* Because code conversion is necessary for text files, a distiction
  between "text" and "binary" download was added (based on the
  downloaded MIME type; see the routines http_set_convert_flag() and
  http_get_convert_flag(). A future patch may add a new
  --conversion=text/binary/auto switch which is not implemented
  yet.)  Currently, the same heuristics are used as in the Apache
  HTTP server to determine whether conversion is required (for
  several kinds of text files) or not required (for images,
  compressed files etc.)

* Because EBCDIC alphabetic characters live in the range between
  '\xA1' and '\xE9', the getopt_long() numbers have been shifted up
  by 200, beyond the 0xFF boundary, to avoid conflicts between
  single-character options and numeric long-option values. That does
  not change the behaviour on ASCII machines, but allows the source
  to compile on EBCDIC machines (otherwise: error: multiple case in
  switch).

* wget-1.8.2 has been compiled on our BS2000, with the patch applied,
  and with SSL enabled (against openssl-0.9.6k), and has been tested
  to work correctly.

If you would add the patch to future versions of wget, then all
users of our BS2000 as well as users of IBM's OS/390 could take
advantage of the availability of wget for EBCDIC-based machines, and
hopefully someone would also contribute the missing IBM-EBCDIC
counterparts to our BS2000-EBCDIC patch.

  Martin
-- 
<[EMAIL PROTECTED]>         |     Fujitsu Siemens
Fon: +49-89-636-46021, FAX: +49-89-636-47655 | 81730  Munich,  Germany
diff -bur wget-1.8.2/src/ftp.c work/wget-1.8.2/src/ftp.c
--- wget-1.8.2/src/ftp.c.orig   2003-10-06 17:20:58.710178000 +0200
+++ wget-1.8.2/src/ftp.c        2003-10-06 17:17:00.399371000 +0200
@@ -474,7 +474,7 @@
        }
 
       err = ftp_size(&con->rbuf, u->file, len);
-//      printf("\ndebug: %lld\n", *len);
+/*      printf("\ndebug: %lld\n", *len); */
       /* FTPRERR */
       switch (err)
        {
diff -bur wget-1.8.2/src/http.c work/wget-1.8.2/src/http.c
--- wget-1.8.2/src/http.c.orig  2003-10-06 17:20:58.900182000 +0200
+++ wget-1.8.2/src/http.c       2003-10-06 17:19:16.829836000 +0200
@@ -1777,7 +1777,7 @@
          FREE_MAYBE (dummy);
          return RETROK;
        }
-//      fprintf(stderr, "test: hstat.len: %lld, hstat.restval: %lld\n", hstat.dltime);
+/*      fprintf(stderr, "test: hstat.len: %lld, hstat.restval: %lld\n", 
hstat.dltime); */
       tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0);
 
       if (hstat.len == hstat.contlen)
diff -bur wget-1.8.2.orig/src/connect.c wget-1.8.2/src/connect.c
--- wget-1.8.2.orig/src/connect.c       Mon Oct  6 17:13:11 2003
+++ wget-1.8.2/src/connect.c    Mon Oct  6 17:10:28 2003
@@ -47,6 +47,10 @@
 #endif
 #endif /* WINDOWS */
 
+#if #system(bs2000)
+#include <ascii_ebcdic.h>
+#endif
+
 #include <errno.h>
 #ifdef HAVE_STRING_H
 # include <string.h>
@@ -73,6 +77,26 @@
    to connect_to_one.  */
 static const char *connection_host_name;
 
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+/* Start off with convert=1 (headers are always converted) */
+static int convert_flag_last_reply = 1;
+
+void
+http_set_convert_flag(const char *type)
+{
+    convert_flag_last_reply = 
+               (strncasecmp(type, "text/", 5) == 0 
+               || strncasecmp(type, "message/", 8) == 0 
+               || strcasecmp(type, "application/postscript") == 0);
+}
+
+int
+http_get_convert_flag()
+{
+    return convert_flag_last_reply;
+}
+#endif
+ 
 void
 set_connection_host_name (const char *host)
 {
@@ -459,6 +483,11 @@
     }
   while (res == -1 && errno == EINTR);
 
+#if 'A' == '\xC1'
+  if (res > 0 && http_get_convert_flag())
+    _a2e_n(buf,res);
+#endif
+
   return res;
 }
 
@@ -472,6 +501,25 @@
 {
   int res = 0;
 
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+  static char *cbuf = NULL;
+  static int csize = 0;
+
+  if (len > csize) {
+    if (cbuf != NULL)
+      free(cbuf);
+    cbuf = malloc(csize = len+8192); /* add arbitrary amount of skew */
+    if (cbuf == NULL)
+      csize = 0;
+  }
+  if (cbuf)
+  {
+    memcpy(cbuf,buf,len);
+    _e2a_n(cbuf, len);
+    buf = cbuf;
+  }
+#endif /*CHARSET_EBCDIC*/
+ 
   /* `write' may write less than LEN bytes, thus the outward loop
      keeps trying it until all was written, or an error occurred.  The
      inner loop is reserved for the usual EINTR f*kage, and the
diff -bur wget-1.8.2.orig/src/gen_sslfunc.c wget-1.8.2/src/gen_sslfunc.c
--- wget-1.8.2.orig/src/gen_sslfunc.c   Sat May 18 05:14:48 2002
+++ wget-1.8.2/src/gen_sslfunc.c        Mon Oct  6 21:52:18 2003
@@ -252,6 +252,11 @@
     }
   while (res == -1 && errno == EINTR);
 
+#if 'A' == '\xC1'
+  if (res > 0 && http_get_convert_flag())
+    _a2e_n(buf,res);
+#endif
+
   return res;
 }
 
@@ -271,6 +276,28 @@
      keeps trying it until all was written, or an error occurred.  The
      inner loop is reserved for the usual EINTR f*kage, and the
      innermost loop deals with the same during select().  */
+
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+  {
+   static char *cbuf = NULL;
+   static int csize = 0;
+
+   if (len > csize) {
+    if (cbuf != NULL)
+      free(cbuf);
+    cbuf = malloc(csize = len+8192); /* add arbitrary amount of skew */
+    if (cbuf == NULL)
+      csize = 0;
+   }
+   if (cbuf)
+   {
+    memcpy(cbuf,buf,len);
+    _e2a_n(cbuf, len);
+    buf = cbuf;
+   }
+  }
+#endif /*CHARSET_EBCDIC*/
+ 
   while (len > 0)
     {
       do
diff -bur wget-1.8.2.orig/src/gnu-md5.c wget-1.8.2/src/gnu-md5.c
--- wget-1.8.2.orig/src/gnu-md5.c       Sun Nov 18 05:36:20 2001
+++ wget-1.8.2/src/gnu-md5.c    Mon Oct  6 17:10:28 2003
@@ -37,6 +37,10 @@
 /*#  define memcpy(d, s, n) bcopy ((s), (d), (n))*/
 /*# endif*/
 /*#endif*/
+#if #system(bs2000)
+#include <ascii_ebcdic.h>
+#define WORDS_BIGENDIAN 1
+#endif
 
 #include "wget.h"
 #include "gnu-md5.h"
@@ -116,6 +120,16 @@
   *(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP ((ctx->total[1] << 3) |
                                                        (ctx->total[0] >> 29));
 
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+  /* XXX: @@@: In order to make this no more complex than necessary,
+   * this kludge converts the bits[] array using the ascii-to-ebcdic
+   * table, because the following md5_process_block() re-translates
+   * its input (ebcdic-to-ascii).
+   * Otherwise, we would have to pass a "conversion" flag to md5_process_block()
+   */
+  _a2e_n((void*)&ctx->buffer[bytes],pad+8);
+#endif /*CHARSET_EBCDIC*/
+
   /* Process last bytes.  */
   md5_process_block (ctx->buffer, bytes + pad + 8, ctx);
 
@@ -283,6 +297,14 @@
       md5_uint32 C_save = C;
       md5_uint32 D_save = D;
 
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+      char buffer64[64];
+      const md5_uint32 *words_next = (const md5_uint32 *)&((char *)words)[sizeof 
buffer64];
+      memcpy((void*)buffer64, (void*)words, sizeof buffer64);
+      _e2a_n((void*)buffer64, sizeof buffer64);
+      words = (const md5_uint32 *)buffer64;
+#endif
+
       /* First round: using the given function, the context and a constant
         the next context is computed.  Because the algorithms processing
         unit is a 32-bit word and it is determined to work on words in
@@ -400,6 +422,9 @@
       B += B_save;
       C += C_save;
       D += D_save;
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+      words = words_next;
+#endif
     }
 
   /* Put checksum in context given as argument.  */
diff -bur wget-1.8.2.orig/src/http.c wget-1.8.2/src/http.c
--- wget-1.8.2.orig/src/http.c  Mon Oct  6 17:13:12 2003
+++ wget-1.8.2/src/http.c       Mon Oct  6 17:10:28 2003
@@ -53,6 +53,9 @@
 #  include <time.h>
 # endif
 #endif
+#if #system(bs2000)
+#include <ascii_ebcdic.h>
+#endif
 
 #include "wget.h"
 #include "utils.h"
@@ -616,6 +619,11 @@
     }
 #endif /* HAVE_SSL */
 
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+  /* Initialize to text/* to force header conversion */
+  http_set_convert_flag("text/plain");
+#endif
+
   if (!(*dt & HEAD_ONLY))
     /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
        know the local filename so we can save to it. */
@@ -1027,7 +1035,12 @@
       /* Try getting content-type.  */
       if (!type)
        if (header_process (hdr, "Content-Type", http_process_type, &type))
+       {
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+         http_set_convert_flag(type);
+#endif
          goto done_header;
+       }
       /* Try getting location.  */
       if (!hs->newloc)
        if (header_process (hdr, "Location", header_strdup, &hs->newloc))
@@ -2125,6 +2138,9 @@
 
   t1 = (char *)alloca (len1 + 1);
   sprintf (t1, "%s:%s", user, passwd);
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+  _e2a_n((unsigned char *)t1,strlen(t1));
+#endif
   t2 = (char *)alloca (1 + len2);
   base64_encode (t1, t2, len1);
   res = (char *)xmalloc (len2 + 11 + strlen (header));
diff -bur wget-1.8.2.orig/src/main.c wget-1.8.2/src/main.c
--- wget-1.8.2.orig/src/main.c  Mon Oct  6 17:13:13 2003
+++ wget-1.8.2/src/main.c       Mon Oct  6 17:12:20 2003
@@ -257,44 +257,44 @@
 
   static struct option long_options[] =
   {
     /* Options without arguments: */
     { "background", no_argument, NULL, 'b' },
     { "backup-converted", no_argument, NULL, 'K' },
     { "continue", no_argument, NULL, 'c' },
     { "convert-links", no_argument, NULL, 'k' },
     { "debug", no_argument, NULL, 'd' },
-    { "delete-after", no_argument, NULL, 136 },
-    { "dont-remove-listing", no_argument, NULL, 149 },
-    { "follow-ftp", no_argument, NULL, 142 },
+    { "delete-after", no_argument, NULL, 336 },
+    { "dont-remove-listing", no_argument, NULL, 349 },
+    { "follow-ftp", no_argument, NULL, 342 },
     { "force-directories", no_argument, NULL, 'x' },
     { "force-hier", no_argument, NULL, 'x' }, /* obsolete */
     { "force-html", no_argument, NULL, 'F'},
     { "help", no_argument, NULL, 'h' },
     { "html-extension", no_argument, NULL, 'E' },
-    { "ignore-length", no_argument, NULL, 138 },
+    { "ignore-length", no_argument, NULL, 338 },
 #ifdef INET6
     { "inet", no_argument, NULL, '4' },
     { "inet6", no_argument, NULL, '6' },
 #endif
     { "mirror", no_argument, NULL, 'm' },
-    { "no-clobber", no_argument, NULL, 141 },
-    { "no-directories", no_argument, NULL, 147 },
-    { "no-host-directories", no_argument, NULL, 148 },
-    { "no-host-lookup", no_argument, NULL, 150 },
-    { "no-http-keep-alive", no_argument, NULL, 156 },
-    { "no-parent", no_argument, NULL, 133 },
-    { "non-verbose", no_argument, NULL, 146 },
-    { "passive-ftp", no_argument, NULL, 139 },
+    { "no-clobber", no_argument, NULL, 341 },
+    { "no-directories", no_argument, NULL, 347 },
+    { "no-host-directories", no_argument, NULL, 348 },
+    { "no-host-lookup", no_argument, NULL, 350 },
+    { "no-http-keep-alive", no_argument, NULL, 356 },
+    { "no-parent", no_argument, NULL, 333 },
+    { "non-verbose", no_argument, NULL, 346 },
+    { "passive-ftp", no_argument, NULL, 339 },
     { "page-requisites", no_argument, NULL, 'p' },
     { "quiet", no_argument, NULL, 'q' },
-    { "random-wait", no_argument, NULL, 165 },
+    { "random-wait", no_argument, NULL, 365 },
     { "recursive", no_argument, NULL, 'r' },
     { "relative", no_argument, NULL, 'L' },
-    { "retr-symlinks", no_argument, NULL, 137 },
+    { "retr-symlinks", no_argument, NULL, 337 },
     { "save-headers", no_argument, NULL, 's' },
     { "server-response", no_argument, NULL, 'S' },
     { "span-hosts", no_argument, NULL, 'H' },
-    { "spider", no_argument, NULL, 132 },
+    { "spider", no_argument, NULL, 332 },
     { "timestamping", no_argument, NULL, 'N' },
     { "verbose", no_argument, NULL, 'v' },
     { "version", no_argument, NULL, 'V' },
@@ -302,52 +302,52 @@
     /* Options accepting an argument: */
     { "accept", required_argument, NULL, 'A' },
     { "append-output", required_argument, NULL, 'a' },
-    { "backups", required_argument, NULL, 151 }, /* undocumented */
+    { "backups", required_argument, NULL, 351 }, /* undocumented */
     { "base", required_argument, NULL, 'B' },
-    { "bind-address", required_argument, NULL, 155 },
+    { "bind-address", required_argument, NULL, 355 },
     { "cache", required_argument, NULL, 'C' },
-    { "cookies", required_argument, NULL, 160 },
-    { "cut-dirs", required_argument, NULL, 145 },
+    { "cookies", required_argument, NULL, 360 },
+    { "cut-dirs", required_argument, NULL, 345 },
     { "directory-prefix", required_argument, NULL, 'P' },
     { "domains", required_argument, NULL, 'D' },
-    { "dot-style", required_argument, NULL, 134 },
+    { "dot-style", required_argument, NULL, 334 },
     { "execute", required_argument, NULL, 'e' },
     { "exclude-directories", required_argument, NULL, 'X' },
-    { "exclude-domains", required_argument, NULL, 140 },
-    { "follow-tags", required_argument, NULL, 153 },
+    { "exclude-domains", required_argument, NULL, 340 },
+    { "follow-tags", required_argument, NULL, 353 },
     { "glob", required_argument, NULL, 'g' },
-    { "header", required_argument, NULL, 131 },
-    { "htmlify", required_argument, NULL, 135 },
-    { "http-passwd", required_argument, NULL, 130 },
-    { "http-user", required_argument, NULL, 129 },
+    { "header", required_argument, NULL, 331 },
+    { "htmlify", required_argument, NULL, 335 },
+    { "http-passwd", required_argument, NULL, 330 },
+    { "http-user", required_argument, NULL, 329 },
     { "ignore-tags", required_argument, NULL, 'G' },
     { "include-directories", required_argument, NULL, 'I' },
     { "input-file", required_argument, NULL, 'i' },
     { "level", required_argument, NULL, 'l' },
-    { "limit-rate", required_argument, NULL, 164 },
-    { "load-cookies", required_argument, NULL, 161 },
+    { "limit-rate", required_argument, NULL, 364 },
+    { "load-cookies", required_argument, NULL, 361 },
     { "no", required_argument, NULL, 'n' },
     { "output-document", required_argument, NULL, 'O' },
     { "output-file", required_argument, NULL, 'o' },
-    { "progress", required_argument, NULL, 163 },
+    { "progress", required_argument, NULL, 363 },
     { "proxy", required_argument, NULL, 'Y' },
-    { "proxy-passwd", required_argument, NULL, 144 },
-    { "proxy-user", required_argument, NULL, 143 },
+    { "proxy-passwd", required_argument, NULL, 344 },
+    { "proxy-user", required_argument, NULL, 343 },
     { "quota", required_argument, NULL, 'Q' },
     { "reject", required_argument, NULL, 'R' },
-    { "save-cookies", required_argument, NULL, 162 },
+    { "save-cookies", required_argument, NULL, 362 },
     { "timeout", required_argument, NULL, 'T' },
     { "tries", required_argument, NULL, 't' },
     { "user-agent", required_argument, NULL, 'U' },
-    { "referer", required_argument, NULL, 157 },
+    { "referer", required_argument, NULL, 357 },
     { "use-proxy", required_argument, NULL, 'Y' },
 #ifdef HAVE_SSL
-    { "sslcertfile", required_argument, NULL, 158 },
-    { "sslcertkey", required_argument, NULL, 159 },
-    { "egd-file", required_argument, NULL, 166 },
+    { "sslcertfile", required_argument, NULL, 358 },
+    { "sslcertkey", required_argument, NULL, 359 },
+    { "egd-file", required_argument, NULL, 366 },
 #endif /* HAVE_SSL */
     { "wait", required_argument, NULL, 'w' },
-    { "waitretry", required_argument, NULL, 152 },
+    { "waitretry", required_argument, NULL, 352 },
     { 0, 0, 0, 0 }
   };
 
@@ -379,52 +379,52 @@
       switch (c)
        {
          /* Options without arguments: */
-       case 132:
+       case 332:
          setval ("spider", "on");
          break;
-       case 133:
+       case 333:
          setval ("noparent", "on");
          break;
-       case 136:
+       case 336:
          setval ("deleteafter", "on");
          break;
-       case 137:
+       case 337:
          setval ("retrsymlinks", "on");
          break;
-       case 138:
+       case 338:
          setval ("ignorelength", "on");
          break;
-       case 139:
+       case 339:
          setval ("passiveftp", "on");
          break;
-       case 141:
+       case 341:
          setval ("noclobber", "on");
          break;
-       case 142:
+       case 342:
          setval ("followftp", "on");
          break;
-       case 145:
+       case 345:
          setval ("cutdirs", optarg);
          break;
-       case 146:
+       case 346:
          setval ("verbose", "off");
          break;
-       case 147:
+       case 347:
          setval ("dirstruct", "off");
          break;
-       case 148:
+       case 348:
          setval ("addhostdir", "off");
          break;
-       case 149:
+       case 349:
          setval ("removelisting", "off");
          break;
-       case 155:
+       case 355:
          setval ("bindaddress", optarg);
          break;
-       case 156:
+       case 356:
          setval ("httpkeepalive", "off");
          break;
-       case 165:
+       case 365:
          setval ("randomwait", "on");
          break;
 #ifdef INET6
@@ -517,65 +517,65 @@
          break;
 
          /* Options accepting an argument: */
-       case 129:
+       case 329:
          setval ("httpuser", optarg);
          break;
-       case 130:
+       case 330:
          setval ("httppasswd", optarg);
          break;
-       case 131:
+       case 331:
          setval ("header", optarg);
          break;
-       case 134:
+       case 334:
          setval ("dotstyle", optarg);
          break;
-       case 135:
+       case 335:
          setval ("htmlify", optarg);
          break;
-       case 140:
+       case 340:
          setval ("excludedomains", optarg);
          break;
-       case 143:
+       case 343:
          setval ("proxyuser", optarg);
          break;
-       case 144:
+       case 344:
          setval ("proxypasswd", optarg);
          break;
-       case 151:
+       case 351:
          setval ("backups", optarg);
          break;
-       case 152:
+       case 352:
          setval ("waitretry", optarg);
          break;
-       case 153:
+       case 353:
          setval ("followtags", optarg);
          break;
-       case 160:
+       case 360:
          setval ("cookies", optarg);
          break;
-       case 161:
+       case 361:
          setval ("loadcookies", optarg);
          break;
-       case 162:
+       case 362:
          setval ("savecookies", optarg);
          break;
-       case 163:
+       case 363:
          setval ("progress", optarg);
          break;
-       case 164:
+       case 364:
          setval ("limitrate", optarg);
          break;
-       case 157:
+       case 357:
          setval ("referer", optarg);
          break;
 #ifdef HAVE_SSL
-       case 158:
+       case 358:
          setval ("sslcertfile", optarg);
          break;
-       case 159:
+       case 359:
          setval ("sslcertkey", optarg);
          break;
-       case 166:
+       case 366:
          setval ("egdfile", optarg);
          break;
 #endif /* HAVE_SSL */
diff -bur wget-1.8.2.orig/src/retr.c wget-1.8.2/src/retr.c
--- wget-1.8.2.orig/src/retr.c  Mon Oct  6 17:13:13 2003
+++ wget-1.8.2/src/retr.c       Mon Oct  6 17:10:28 2003
@@ -58,6 +58,10 @@
 # include "gen_sslfunc.h"      /* for ssl_iread */
 #endif
 
+#if #system(bs2000)
+#include <ascii_ebcdic.h>
+#endif
+
 #ifndef errno
 extern int errno;
 #endif
@@ -152,6 +156,11 @@
       int sz = 0;
       while ((res = rbuf_flush (rbuf, c, sizeof (c))) != 0)
        {
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+         /* un-convert data (they were converted for header reading) */
+         if (!http_get_convert_flag())
+           _e2a_n(c,res);
+#endif
          fwrite (c, sizeof (char), res, fp);
          *len += res;
          sz += res;
diff -bur wget-1.8.2.orig/src/safe-ctype.c wget-1.8.2/src/safe-ctype.c
--- wget-1.8.2.orig/src/safe-ctype.c    Sat May 18 05:05:21 2002
+++ wget-1.8.2/src/safe-ctype.c Mon Oct  6 17:10:29 2003
@@ -168,6 +168,87 @@
  240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
 };
 
+#elif ('A' == '\xC1') /* EBCDIC */ && \
+      ('~' == '\xFF') /* OSD_EBCDIC_DF04_1 (aka. EDF04-1) */
+
+const unsigned short _sch_istable[256] =
+{
+  Z,  C,  C,  C,  0,  T,  0,  C, /* 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07  */
+  0,  0,  0,  M,  M,  V,  C,  C, /* 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F  */
+  C,  C,  C,  C,  0,  V,  C,  0, /* 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17  */
+  C,  C,  0,  0,  C,  C,  C,  C, /* 0x18 0x19 0x1A 0x1B 0x1C 0x1D 0x1E 0x1F  */
+  0,  0,  0,  0,  0,  C,  C,  C, /* 0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27  */
+  0,  0,  0,  0,  0,  C,  C,  C, /* 0x28 0x29 0x2A 0x2B 0x2C 0x2D 0x2E 0x2F  */
+  0,  0,  C,  0,  0,  0,  0,  C, /* 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37  */
+  0,  0,  0,  0,  C,  C,  0,  C, /* 0x38 0x39 0x3A 0x3B 0x3C 0x3D 0x3E 0x3F  */
+  S,  0,  0,  0,  0,  0,  0,  0, /* 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47  */
+  0,  0,  P,  P,  P,  P,  P,  P, /* 0x48 0x49   `    .    <    (    +    |   */
+  P,  0,  0,  0,  0,  0,  0,  0, /*   &  0x51 0x52 0x53 0x54 0x55 0x56 0x57  */
+  0,  0,  P,  P,  P,  P,  P,  0, /* 0x58 0x59   !    $    *    )    ;  0x5F  */
+  P,  P,  0,  0,  0,  0,  0,  0, /*   -    /  0x62 0x63 0x64 0x65 0x66 0x67  */
+  0,  0,  P,  P,  P,  _,  P,  P, /* 0x68 0x69   ^    ,    %    _    >    ?   */
+  0,  0,  0,  0,  0,  0,  0,  0, /* 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77  */
+  0,  0,  P,  P,  P,  P,  P,  P, /* 0x78 0x79   :    #    @    '    =    "   */
+  0, XL, XL, XL, XL, XL, XL,  L, /* 0x80   a    b    c    d    e    f    g   */
+  L,  L,  0,  0,  0,  0,  0,  0, /*   h    i  0x8A 0x8B 0x8C 0x8D 0x8E 0x8F  */
+  0,  L,  L,  L,  L,  L,  L,  L, /* 0x90   j    k    l    m    n    o    p   */
+  L,  L,  0,  0,  0,  0,  0,  0, /*   q    r  0x9A 0x9B 0x9C 0x9D 0x9E 0x9F  */
+  0,  0,  L,  L,  L,  L,  L,  L, /* 0xA0 0xA1   s    t    u    v    w    x   */
+  L,  L,  0,  0,  0,  0,  0,  0, /*   y    z  0xAA 0xAB 0xAC 0xAD 0xAE 0xAF  */
+  0,  0,  0,  0,  0,  0,  0,  0, /* 0xB0 0xB1 0xB2 0xB3 0xB4 0xB5 0xB6 0xB7  */
+  0,  0,  0,  P,  P,  P,  0,  0, /* 0xB8 0xB9 0xBA   [    \    ]  0xBE 0xBF  */
+  0, XU, XU, XU, XU, XU, XU,  U, /* 0xC0   A    B    C    D    E    F    G   */
+  U,  U,  0,  0,  0,  0,  0,  0, /*   H    I  0xCA 0xCB 0xCC 0xCD 0xCE 0xCF  */
+  0,  U,  U,  U,  U,  U,  U,  U, /* 0xD0   J    K    L    M    N    O    P   */
+  U,  U,  0,  0,  0,  0,  0,  0, /*   Q    R  0xDA 0xDB 0xDC 0xDD 0xDE 0xDF  */
+  0,  0,  U,  U,  U,  U,  U,  U, /* 0xE0 0xE1   S    T    U    V    W    X   */
+  U,  U,  0,  0,  0,  0,  0,  0, /*   Y    Z  0xEA 0xEB 0xEC 0xED 0xEE 0xEF  */
+  D,  D,  D,  D,  D,  D,  D,  D, /*   0    1    2    3    4    5    6    7   */
+  D,  D,  0,  P,  0,  P,  0,  P, /*   8    9  0xFA   {  0xFC   }  0xFE   ~   */
+};
+
+
+const unsigned char _sch_tolower[256] =
+{
+0x00,0x01,0x02,0x03, 0x04,0x05,0x06,0x07, 0x08,0x09,0x0A,0x0B, 0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x1C,0x1D,0x1E,0x1F,
+0x20,0x21,0x22,0x23, 0x24,0x25,0x26,0x27, 0x28,0x29,0x2A,0x2B, 0x2C,0x2D,0x2E,0x2F,
+0x30,0x31,0x32,0x33, 0x34,0x35,0x36,0x37, 0x38,0x39,0x3A,0x3B, 0x3C,0x3D,0x3E,0x3F,
+0x40,0x41,0x42,0x43, 0x44,0x45,0x46,0x47, 0x48,0x49,0x4A,0x4B, 0x4C,0x4D,0x4E,0x4F,
+0x50,0x51,0x52,0x53, 0x54,0x55,0x56,0x57, 0x58,0x59,0x5A,0x5B, 0x5C,0x5D,0x5E,0x5F,
+0x60,0x61,0x62,0x63, 0x64,0x65,0x66,0x67, 0x68,0x69,0x6A,0x6B, 0x6C,0x6D,0x6E,0x6F,
+0x70,0x71,0x72,0x73, 0x74,0x75,0x76,0x77, 0x78,0x79,0x7A,0x7B, 0x7C,0x7D,0x7E,0x7F,
+0x80, 'a', 'b', 'c',  'd', 'e', 'f', 'g',  'h', 'i',0x8A,0x8B, 0x8C,0x8D,0x8E,0x8F,
+0x90, 'j', 'k', 'l',  'm', 'n', 'o', 'p',  'q', 'r',0x9A,0x9B, 0x9C,0x9D,0x9E,0x9F,
+0xA0,0xA1, 's', 't',  'u', 'v', 'w', 'x',  'y', 'z',0xAA,0xAB, 0xAC,0xAD,0xAE,0xAF,
+0xB0,0xB1,0xB2,0xB3, 0xB4,0xB5,0xB6,0xB7, 0xB8,0xB9,0xBA,0xBB, 0xBC,0xBD,0xBE,0xBF,
+0xC0, 'a', 'b', 'c',  'd', 'e', 'f', 'g',  'h', 'i',0xCA,0xCB, 0xCC,0xCD,0xCE,0xCF,
+0xD0, 'j', 'k', 'l',  'm', 'n', 'o', 'p',  'q', 'r',0xDA,0xDB, 0xDC,0xDD,0xDE,0xDF,
+0xE0,0xE1, 's', 't',  'u', 'v', 'w', 'x',  'y', 'z',0xEA,0xEB, 0xEC,0xED,0xEE,0xEF,
+0xF0,0xF1,0xF2,0xF3, 0xF4,0xF5,0xF6,0xF7, 0xF8,0xF9,0xFA,0xFB, 0xFC,0xFD,0xFE,0xFF,
+};
+
+
+const unsigned char _sch_toupper[256] =
+{
+0x00,0x01,0x02,0x03, 0x04,0x05,0x06,0x07, 0x08,0x09,0x0A,0x0B, 0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x1C,0x1D,0x1E,0x1F,
+0x20,0x21,0x22,0x23, 0x24,0x25,0x26,0x27, 0x28,0x29,0x2A,0x2B, 0x2C,0x2D,0x2E,0x2F,
+0x30,0x31,0x32,0x33, 0x34,0x35,0x36,0x37, 0x38,0x39,0x3A,0x3B, 0x3C,0x3D,0x3E,0x3F,
+0x40,0x41,0x42,0x43, 0x44,0x45,0x46,0x47, 0x48,0x49,0x4A,0x4B, 0x4C,0x4D,0x4E,0x4F,
+0x50,0x51,0x52,0x53, 0x54,0x55,0x56,0x57, 0x58,0x59,0x5A,0x5B, 0x5C,0x5D,0x5E,0x5F,
+0x60,0x61,0x62,0x63, 0x64,0x65,0x66,0x67, 0x68,0x69,0x6A,0x6B, 0x6C,0x6D,0x6E,0x6F,
+0x70,0x71,0x72,0x73, 0x74,0x75,0x76,0x77, 0x78,0x79,0x7A,0x7B, 0x7C,0x7D,0x7E,0x7F,
+0x80, 'A', 'B', 'C',  'D', 'E', 'F', 'G',  'H', 'I',0x8A,0x8B, 0x8C,0x8D,0x8E,0x8F,
+0x90, 'J', 'K', 'L',  'M', 'N', 'O', 'P',  'Q', 'R',0x9A,0x9B, 0x9C,0x9D,0x9E,0x9F,
+0xA0,0xA1, 'S', 'T',  'U', 'V', 'W', 'X',  'Y', 'Z',0xAA,0xAB, 0xAC,0xAD,0xAE,0xAF,
+0xB0,0xB1,0xB2,0xB3, 0xB4,0xB5,0xB6,0xB7, 0xB8,0xB9,0xBA,0xBB, 0xBC,0xBD,0xBE,0xBF,
+0xC0, 'A', 'B', 'C',  'D', 'E', 'F', 'G',  'H', 'I',0xCA,0xCB, 0xCC,0xCD,0xCE,0xCF,
+0xD0, 'J', 'K', 'L',  'M', 'N', 'O', 'P',  'Q', 'R',0xDA,0xDB, 0xDC,0xDD,0xDE,0xDF,
+0xE0,0xE1, 'S', 'T',  'U', 'V', 'W', 'X',  'Y', 'Z',0xEA,0xEB, 0xEC,0xED,0xEE,0xEF,
+0xF0,0xF1,0xF2,0xF3, 0xF4,0xF5,0xF6,0xF7, 0xF8,0xF9,0xFA,0xFB, 0xFC,0xFD,0xFE,0xFF,
+};
+
 #else
  #error "Unsupported host character set"
 #endif /* not ASCII */
diff -bur wget-1.8.2.orig/src/url.c wget-1.8.2/src/url.c
--- wget-1.8.2.orig/src/url.c   Mon Oct  6 17:13:14 2003
+++ wget-1.8.2/src/url.c        Mon Oct  6 17:10:29 2003
@@ -49,6 +49,10 @@
 #include "host.h"
 #include "hash.h"
 
+#if #system(bs2000)
+#include <ascii_ebcdic.h>
+#endif
+
 #ifndef errno
 extern int errno;
 #endif
@@ -98,7 +102,11 @@
 #define U  urlchr_unsafe
 #define RU R|U
 
+#if 'A' == '\xC1'
+#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(_e2a_tab[c])] & (mask))
+#else
 #define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
+#endif
 
 /* rfc1738 reserved chars, preserved from encoding.  */
 
@@ -164,6 +172,9 @@
              || !(ISXDIGIT (*(h + 1)) && ISXDIGIT (*(h + 2))))
            goto copychar;
          *t = (XCHAR_TO_XDIGIT (*(h + 1)) << 4) + XCHAR_TO_XDIGIT (*(h + 2));
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+         *t = _a2e_tab[(unsigned char)*t];
+#endif
          h += 2;
        }
     }
@@ -197,6 +208,9 @@
       if (UNSAFE_CHAR (*p1))
        {
          unsigned char c = *p1++;
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+         c = _e2a_tab[(unsigned char)c];
+#endif
          *p2++ = '%';
          *p2++ = XDIGIT_TO_XCHAR (c >> 4);
          *p2++ = XDIGIT_TO_XCHAR (c & 0xf);
@@ -252,6 +266,9 @@
             is. */
          char preempt = (XCHAR_TO_XDIGIT (*(p + 1)) << 4) +
            XCHAR_TO_XDIGIT (*(p + 2));
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+         preempt = _a2e_tab[(unsigned char)preempt];
+#endif
 
          if (UNSAFE_CHAR (preempt) || RESERVED_CHAR (preempt))
            return CM_PASSTHROUGH;
@@ -393,6 +410,9 @@
        case CM_ENCODE:
          {
            unsigned char c = *p1++;
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+           c = _e2a_tab[(unsigned char)c];
+#endif
            *p2++ = '%';
            *p2++ = XDIGIT_TO_XCHAR (c >> 4);
            *p2++ = XDIGIT_TO_XCHAR (c & 0xf);
@@ -401,6 +421,9 @@
        case CM_DECODE:
          *p2++ = ((XCHAR_TO_XDIGIT (*(p1 + 1)) << 4)
                   + (XCHAR_TO_XDIGIT (*(p1 + 2))));
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+         p2[-1] = _a2e_tab[(unsigned char)p2[-1]];
+#endif
          p1 += 3;              /* skip %xx */
          break;
        case CM_PASSTHROUGH:
@@ -1342,6 +1365,9 @@
       if (UNSAFE_CHAR (*from))
        {
          unsigned char c = *from++;
+#if 'A' == '\xC1' /* CHARSET_EBCDIC */
+         c = _e2a_tab[(unsigned char)c];
+#endif
          *to++ = '%';
          *to++ = XDIGIT_TO_XCHAR (c >> 4);
          *to++ = XDIGIT_TO_XCHAR (c & 0xf);
--- /dev/null   Mon Oct  6 17:08:40 2003
+++ wget-1.8.2/src/safe-ctype-mk.c      Mon Oct  6 17:10:28 2003
@@ -0,0 +1,282 @@
+#include <ctype.h>
+#include <stdio.h>
+#if 'A' == '\xC1'
+#include <ascii_ebcdic.h>
+#endif
+
+/* Shorthand */
+#define bl isblank
+#define cn iscntrl
+#define di isdigit
+#define is isidst
+#define lo islower
+#define nv isnvsp
+#define pn ispunct
+#define pr isprint
+#define sp isspace
+#define up isupper
+#define vs isvsp
+#define xd isxdigit
+
+#ifndef isblank
+static int
+isblank(int ch)
+{
+  /* The isblank() function tests for a space or tab character.  For single C
+     chars locales (see multibyte(3)) the value of the argument is repre-
+     sentable as an unsigned char or the value of EOF.
+  */
+  return (ch == ' ' || ch == '\t');
+}
+#endif
+
+/* Masks.  */
+#define L  lo|is   |pr /* lower case letter */
+#define XL lo|is|xd|pr /* lowercase hex digit */
+#define U  up|is   |pr /* upper case letter */
+#define XU up|is|xd|pr /* uppercase hex digit */
+#define D  di   |xd|pr /* decimal digit */
+#define P  pn      |pr /* punctuation */
+#define _  pn|is   |pr /* underscore */
+
+#define C           cn /* control character */
+#define Z  nv      |cn /* NUL */
+#define M  nv|sp   |cn /* cursor movement: \f \v */
+#define V  vs|sp   |cn /* vertical space: \r \n */
+#define T  nv|sp|bl|cn /* tab */
+#define S  nv|sp|bl|pr /* space */
+
+static void
+append_str(char *dest, char *str)
+{
+  if (dest[0] != '\0')
+    strcat(dest, "|");
+  strcat(dest, str);
+}
+
+/* control character */
+static void
+test_C(int ch, char *dest)
+{
+  if (ch && ch != '\t' && ch != '\f' && ch != '\v' && ch != '\r' && ch != '\n' && 
C(ch))
+    append_str(dest, "C");
+}
+
+/* NUL character */
+static void
+test_Z(int ch, char *dest)
+{
+  if (ch == '\0')
+    append_str(dest, "Z");
+}
+
+/* cursor movement: \f \v */
+static void
+test_M(int ch, char *dest)
+{
+  if (ch == '\f' || ch == '\v')
+  {
+    append_str(dest, "M");
+  }
+}
+
+/* vertical space: \r \n */
+static void
+test_V(int ch, char *dest)
+{
+  if (ch == '\r' || ch == '\n')
+    append_str(dest, "V");
+}
+
+/* tab */
+static void
+test_T(int ch, char *dest)
+{
+  if (ch == '\t')
+    append_str(dest, "T");
+}
+
+/* space */
+static void
+test_S(int ch, char *dest)
+{
+  if (ch == ' ')
+    append_str(dest, "S");
+}
+
+/* digit */
+static void
+test_D(int ch, char *dest)
+{
+  if (isdigit(ch))
+    append_str(dest, "D");
+}
+
+/* upper */
+static void
+test_U(int ch, char *dest)
+{
+  if (isupper(ch))
+  {
+    if (isxdigit(ch))
+    {
+      append_str(dest, "XU");
+      return;
+    }
+    append_str(dest, "U");
+  }
+}
+
+/* lower */
+static void
+test_L(int ch, char *dest)
+{
+  if (islower(ch))
+  {
+    if (isxdigit(ch))
+    {
+      append_str(dest, "XL");
+      return;
+    }
+    append_str(dest, "L");
+  }
+}
+
+/* punctuation */
+static void
+test_P(int ch, char *dest)
+{
+  if (isgraph(ch) && !isalnum(ch) && ch != '_')
+    append_str(dest, "P");
+}
+
+/* underscore */
+static void
+test__(int ch, char *dest)
+{
+  if (ch == '_')
+    append_str(dest, "_");
+}
+
+main()
+{
+  int i;
+  char attrs[32];
+  char descr[256];
+#if 'A' == '\xC1'
+  int ascii_emu = 0;
+#endif
+
+  strcpy (descr, "");
+
+  printf("const unsigned short _sch_istable[256] =\n{\n");
+
+  for (i=0; i<256; ++i)
+  {
+    strcpy (attrs, "");
+#if 'A' == '\xC1'
+    if (ascii_emu) i = _a2e_tab[i];
+#endif
+    test_C(i, attrs);
+    test_Z(i, attrs);
+    test_M(i, attrs);
+    test_V(i, attrs);
+    test_T(i, attrs);
+    test_S(i, attrs);
+    test_D(i, attrs);
+    test_U(i, attrs);
+    test_L(i, attrs);
+    test_P(i, attrs);
+    test__(i, attrs);
+
+    if (attrs[0] == '\0')
+      strcat(attrs, "0");
+//    strcat(attrs, ",");
+    printf("%3s,", attrs);
+    if (isgraph(i))
+    {
+      sprintf(&descr[strlen(descr)], "  %c  ", i);
+#if 'A' == '\xC1'
+      if (ascii_emu) i = _e2a_tab[i];
+#endif
+    }
+    else
+    {
+#if 'A' == '\xC1'
+      if (ascii_emu) i = _e2a_tab[i];
+#endif
+      sprintf(&descr[strlen(descr)], "0x%02X ", i);
+    }
+    if ((i % 8) == 7)
+    {
+      printf(" /* %s */\n", descr);
+      strcpy (descr, "");
+    }
+  }
+  printf("};\n");
+
+  printf("\n\nconst unsigned char _sch_tolower[256] =\n{\n");
+
+  for (i=0; i<256; ++i)
+  {
+#if 'A' == '\xC1'
+    if (ascii_emu) i = _a2e_tab[i];
+#endif
+    if (isalpha(i))
+    {
+      printf(" '%c',", tolower(i));
+#if 'A' == '\xC1'
+      if (ascii_emu) i = _e2a_tab[i];
+#endif
+    }
+    else
+    {
+#if 'A' == '\xC1'
+      if (ascii_emu) i = _e2a_tab[i];
+#endif
+      printf("0x%02X,", i);
+    }
+    if ((i % 16) == 15)
+    {
+      printf("\n");
+    }
+    else if ((i % 4) == 3)
+    {
+      printf(" ");
+    }
+  }
+  printf("};\n");
+
+
+  printf("\n\nconst unsigned char _sch_toupper[256] =\n{\n");
+
+  for (i=0; i<256; ++i)
+  {
+#if 'A' == '\xC1'
+    if (ascii_emu) i = _a2e_tab[i];
+#endif
+    if (isalpha(i))
+    {
+      printf(" '%c',", toupper(i));
+#if 'A' == '\xC1'
+      if (ascii_emu) i = _e2a_tab[i];
+#endif
+    }
+    else
+    {
+#if 'A' == '\xC1'
+      if (ascii_emu) i = _e2a_tab[i];
+#endif
+      printf("0x%02X,", i);
+    }
+    if ((i % 16) == 15)
+    {
+      printf("\n");
+    }
+    else if ((i % 4) == 3)
+    {
+      printf(" ");
+    }
+  }
+  printf("};\n");
+}

Reply via email to