Hi,

> it would be great if you decided to do the hacking. We can offer some
> help of course. There's a clearly defined hook for the conversion
> routine so the task is just to write the routine. It should convert an
> utf-8 sequence into a code in an ISO-8859-like encoding. If you're
> interested, take a look at utf8.cpp and
> PhysicalOutputLayerObj::sendOut() in output.cpp. I'm ready for a more
> detailed discussion.

Ok, attached is a quick and dirty hack that seems to do the job. It 
could need some testing and I'm not sure what utf8CharCode() is 
supposed to do. The patch is against Sablot-0.4.0 and I'd be happy
if it would make its way into the next release.


Salut, Sven
diff -u ../../Sablot-0.40-orig/Sablot/output.cpp ./output.cpp
--- ../../Sablot-0.40-orig/Sablot/output.cpp    Fri Jun 30 12:23:25 2000
+++ ./output.cpp        Tue Jul 11 16:08:46 2000
@@ -205,6 +205,21 @@
     else return OUTPUT_UNKNOWN;
 }
 
+// FIXME: does it make sense to default to UTF8 ?
+Encoding OutputDefinition::getEncoding() const
+{
+    const Str& enc_ = getValueStr(XSLA_ENCODING);
+    if (enc_ == (char*) "utf16")
+      return ENC_UTF16;
+    else if (enc_ == (char*) "ascii")
+      return ENC_ASCII;
+    else if (enc_ == (char*) "iso-8859-1")
+        return ENC_8859_1;
+    else if (enc_ == (char*) "iso-8859-2")
+        return ENC_8859_2;
+    else return ENC_UTF8;
+}
+
 // FIXME: we don't choose HTML output as default under the prescribed conditions
 eFlag OutputDefinition::setDefaults()
 {
@@ -266,8 +281,7 @@
     method = outDef -> getMethod();
     if (method != OUTPUT_UNKNOWN)
         E( outDef -> setDefaults() );
-    enc = ENC_UTF8;
-    // FIXME: set encoding
+    enc = outDef -> getEncoding();
     return OK;
 }
 
diff -u ../../Sablot-0.40-orig/Sablot/output.h ./output.h
--- ../../Sablot-0.40-orig/Sablot/output.h      Fri Jun 30 12:23:25 2000
+++ ./output.h  Tue Jul 11 17:54:27 2000
@@ -88,6 +88,7 @@
     Bool askQNameList(XSL_ATT itemId, const QName &what) const;
     int getStatus(XSL_ATT itemId) const;
     OutputMethod getMethod() const;
+    Encoding getEncoding() const;
 private:
     eFlag setItemStr_(XSL_ATT itemId, const Str& value, Bool doCheck, Bool soft);
     Str stringItems[STRING_ITEMS_COUNT];
@@ -125,7 +126,7 @@
     OutputDefinition *outDef;
     OutputMethod method;
     char buffer[OUTPUT_BUFFER_SIZE],
-        smallBuf[32];
+        smallBuf[SMALL_BUFFER_SIZE];
     int curr;
     Encoding enc;
     eFlag sendOut(const char* data, int length, EscMode escapeMode);
Only in .: patch
Common subdirectories: ../../Sablot-0.40-orig/Sablot/sabcmd and ./sabcmd
diff -u ../../Sablot-0.40-orig/Sablot/utf8.cpp ./utf8.cpp
--- ../../Sablot-0.40-orig/Sablot/utf8.cpp      Fri Jun 30 12:23:25 2000
+++ ./utf8.cpp  Tue Jul 11 17:58:53 2000
@@ -15,7 +15,7 @@
  * Portions created by Ginger Alliance are Copyright (C) 2000 Ginger
  * Alliance Ltd. All Rights Reserved.
  * 
- * Contributor(s):
+ * Contributor(s): Sven Neumann <[EMAIL PROTECTED]>
  * 
  * Alternatively, the contents of this file may be used under the
  * terms of the GNU General Public License Version 2 or later (the
@@ -34,26 +34,66 @@
 //      utf8.cpp
 //
 
+#include <assert.h>
+#include <iconv.h>
 #include "utf8.h"
 
+/*  This MUST match the Encoding enum defined in utf8.h  */
+static char* iconv_encoding[6] =
+{
+  "UTF8",
+  "UTF16",
+  "ASCII",
+  "ISO-8859-1",
+  "ISO-8859-1",
+  "CP1250"
+};
+
 int utf8SingleCharLength(const char* text)
 {
-    return 1;
+  if (*text & (1 << 7))
+    {
+      if (*text & (1 << 5))
+       return 3;
+      else if (*text & 1 << 6)
+       return 2;
+    }
+  return 1;
 }
 
 unsigned long utf8CharCode(const char *text)
 {
-    return (unsigned long)(*text);
+  return (unsigned long)(*text);
 }
 
 int utf8GetChar(char *dest, const char *src)
 {
-    *dest = *src;
-    return 1;
+  int len = utf8SingleCharLength (src);
+  memcpy (dest, src, len);
+  return len;
 }
 
 int utf8Recode(char* dest, const char* src, Encoding enc)
 {
-    *dest = *src;
-    return 1;
+  if (enc == ENC_UTF8)
+    {
+      return utf8GetChar (dest, src);
+    }
+  else
+    {
+      iconv_t  cd;
+      size_t   inbytesleft  = utf8SingleCharLength (src);
+      size_t   outbytesleft = SMALL_BUFFER_SIZE;
+      char    *outbuf       = dest;
+      
+      cd = iconv_open (iconv_encoding[enc], "UTF8");
+      assert (cd != (iconv_t)(-1));
+
+      while (inbytesleft && 
+            iconv (cd, &src, &inbytesleft, &outbuf, &outbytesleft) != -1);
+
+      iconv_close (cd);
+
+      return SMALL_BUFFER_SIZE - outbytesleft;
+    }
 }
diff -u ../../Sablot-0.40-orig/Sablot/utf8.h ./utf8.h
--- ../../Sablot-0.40-orig/Sablot/utf8.h        Fri Jun 30 12:23:25 2000
+++ ./utf8.h    Tue Jul 11 17:55:03 2000
@@ -37,6 +37,8 @@
 #if !defined(Utf8HIncl)
 #define Utf8HIncl
 
+#define SMALL_BUFFER_SIZE 32
+
 enum Encoding
 {
     ENC_UTF8,

Reply via email to