Hi,
> it would be great if you decided to do the hacking. We can offer some
> help of course. There's a clearly defined hook for the conversion
> routine so the task is just to write the routine. It should convert an
> utf-8 sequence into a code in an ISO-8859-like encoding. If you're
> interested, take a look at utf8.cpp and
> PhysicalOutputLayerObj::sendOut() in output.cpp. I'm ready for a more
> detailed discussion.
Ok, attached is a quick and dirty hack that seems to do the job. It
could need some testing and I'm not sure what utf8CharCode() is
supposed to do. The patch is against Sablot-0.4.0 and I'd be happy
if it would make its way into the next release.
Salut, Sven
diff -u ../../Sablot-0.40-orig/Sablot/output.cpp ./output.cpp
--- ../../Sablot-0.40-orig/Sablot/output.cpp Fri Jun 30 12:23:25 2000
+++ ./output.cpp Tue Jul 11 16:08:46 2000
@@ -205,6 +205,21 @@
else return OUTPUT_UNKNOWN;
}
+// FIXME: does it make sense to default to UTF8 ?
+Encoding OutputDefinition::getEncoding() const
+{
+ const Str& enc_ = getValueStr(XSLA_ENCODING);
+ if (enc_ == (char*) "utf16")
+ return ENC_UTF16;
+ else if (enc_ == (char*) "ascii")
+ return ENC_ASCII;
+ else if (enc_ == (char*) "iso-8859-1")
+ return ENC_8859_1;
+ else if (enc_ == (char*) "iso-8859-2")
+ return ENC_8859_2;
+ else return ENC_UTF8;
+}
+
// FIXME: we don't choose HTML output as default under the prescribed conditions
eFlag OutputDefinition::setDefaults()
{
@@ -266,8 +281,7 @@
method = outDef -> getMethod();
if (method != OUTPUT_UNKNOWN)
E( outDef -> setDefaults() );
- enc = ENC_UTF8;
- // FIXME: set encoding
+ enc = outDef -> getEncoding();
return OK;
}
diff -u ../../Sablot-0.40-orig/Sablot/output.h ./output.h
--- ../../Sablot-0.40-orig/Sablot/output.h Fri Jun 30 12:23:25 2000
+++ ./output.h Tue Jul 11 17:54:27 2000
@@ -88,6 +88,7 @@
Bool askQNameList(XSL_ATT itemId, const QName &what) const;
int getStatus(XSL_ATT itemId) const;
OutputMethod getMethod() const;
+ Encoding getEncoding() const;
private:
eFlag setItemStr_(XSL_ATT itemId, const Str& value, Bool doCheck, Bool soft);
Str stringItems[STRING_ITEMS_COUNT];
@@ -125,7 +126,7 @@
OutputDefinition *outDef;
OutputMethod method;
char buffer[OUTPUT_BUFFER_SIZE],
- smallBuf[32];
+ smallBuf[SMALL_BUFFER_SIZE];
int curr;
Encoding enc;
eFlag sendOut(const char* data, int length, EscMode escapeMode);
Only in .: patch
Common subdirectories: ../../Sablot-0.40-orig/Sablot/sabcmd and ./sabcmd
diff -u ../../Sablot-0.40-orig/Sablot/utf8.cpp ./utf8.cpp
--- ../../Sablot-0.40-orig/Sablot/utf8.cpp Fri Jun 30 12:23:25 2000
+++ ./utf8.cpp Tue Jul 11 17:58:53 2000
@@ -15,7 +15,7 @@
* Portions created by Ginger Alliance are Copyright (C) 2000 Ginger
* Alliance Ltd. All Rights Reserved.
*
- * Contributor(s):
+ * Contributor(s): Sven Neumann <[EMAIL PROTECTED]>
*
* Alternatively, the contents of this file may be used under the
* terms of the GNU General Public License Version 2 or later (the
@@ -34,26 +34,66 @@
// utf8.cpp
//
+#include <assert.h>
+#include <iconv.h>
#include "utf8.h"
+/* This MUST match the Encoding enum defined in utf8.h */
+static char* iconv_encoding[6] =
+{
+ "UTF8",
+ "UTF16",
+ "ASCII",
+ "ISO-8859-1",
+ "ISO-8859-1",
+ "CP1250"
+};
+
int utf8SingleCharLength(const char* text)
{
- return 1;
+ if (*text & (1 << 7))
+ {
+ if (*text & (1 << 5))
+ return 3;
+ else if (*text & 1 << 6)
+ return 2;
+ }
+ return 1;
}
unsigned long utf8CharCode(const char *text)
{
- return (unsigned long)(*text);
+ return (unsigned long)(*text);
}
int utf8GetChar(char *dest, const char *src)
{
- *dest = *src;
- return 1;
+ int len = utf8SingleCharLength (src);
+ memcpy (dest, src, len);
+ return len;
}
int utf8Recode(char* dest, const char* src, Encoding enc)
{
- *dest = *src;
- return 1;
+ if (enc == ENC_UTF8)
+ {
+ return utf8GetChar (dest, src);
+ }
+ else
+ {
+ iconv_t cd;
+ size_t inbytesleft = utf8SingleCharLength (src);
+ size_t outbytesleft = SMALL_BUFFER_SIZE;
+ char *outbuf = dest;
+
+ cd = iconv_open (iconv_encoding[enc], "UTF8");
+ assert (cd != (iconv_t)(-1));
+
+ while (inbytesleft &&
+ iconv (cd, &src, &inbytesleft, &outbuf, &outbytesleft) != -1);
+
+ iconv_close (cd);
+
+ return SMALL_BUFFER_SIZE - outbytesleft;
+ }
}
diff -u ../../Sablot-0.40-orig/Sablot/utf8.h ./utf8.h
--- ../../Sablot-0.40-orig/Sablot/utf8.h Fri Jun 30 12:23:25 2000
+++ ./utf8.h Tue Jul 11 17:55:03 2000
@@ -37,6 +37,8 @@
#if !defined(Utf8HIncl)
#define Utf8HIncl
+#define SMALL_BUFFER_SIZE 32
+
enum Encoding
{
ENC_UTF8,