wrowe 2002/07/16 20:26:30
Modified: . apr.dsp libapr.dsp
dso/win32 dso.c
file_io/win32 open.c
include/arch/win32 fileio.h
test testucs.c
Added: include/arch/win32 utf8.h
misc/win32 utf8.c
Removed: i18n/unix utf8_ucs2.c
include/arch/unix i18n.h
Log:
Move the win32-only utf8 support [for the Unicode filesystem] out of
the i18n/unix tree. Never built it there anyways.
Revision Changes Path
1.109 +8 -8 apr/apr.dsp
Index: apr.dsp
===================================================================
RCS file: /home/cvs/apr/apr.dsp,v
retrieving revision 1.108
retrieving revision 1.109
diff -u -r1.108 -r1.109
--- apr.dsp 17 Jul 2002 02:53:25 -0000 1.108
+++ apr.dsp 17 Jul 2002 03:26:29 -0000 1.109
@@ -157,10 +157,6 @@
# PROP Default_Filter ""
# Begin Source File
-SOURCE=.\i18n\unix\utf8_ucs2.c
-# End Source File
-# Begin Source File
-
SOURCE=.\i18n\unix\xlate.c
# End Source File
# End Group
@@ -234,6 +230,10 @@
# End Source File
# Begin Source File
+SOURCE=.\misc\win32\utf8.c
+# End Source File
+# Begin Source File
+
SOURCE=.\misc\unix\uuid.c
# End Source File
# Begin Source File
@@ -420,10 +420,6 @@
# End Source File
# Begin Source File
-SOURCE=.\include\arch\unix\i18n.h
-# End Source File
-# Begin Source File
-
SOURCE=.\include\arch\win32\inherit.h
# End Source File
# Begin Source File
@@ -445,6 +441,10 @@
# Begin Source File
SOURCE=.\include\arch\win32\threadproc.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\include\arch\win32\utf8.h
# End Source File
# End Group
# Begin Group "Public Header Files"
1.70 +8 -8 apr/libapr.dsp
Index: libapr.dsp
===================================================================
RCS file: /home/cvs/apr/libapr.dsp,v
retrieving revision 1.69
retrieving revision 1.70
diff -u -r1.69 -r1.70
--- libapr.dsp 17 Jul 2002 02:53:25 -0000 1.69
+++ libapr.dsp 17 Jul 2002 03:26:29 -0000 1.70
@@ -163,10 +163,6 @@
# PROP Default_Filter ""
# Begin Source File
-SOURCE=.\i18n\unix\utf8_ucs2.c
-# End Source File
-# Begin Source File
-
SOURCE=.\i18n\unix\xlate.c
# End Source File
# End Group
@@ -240,6 +236,10 @@
# End Source File
# Begin Source File
+SOURCE=.\misc\win32\utf8.c
+# End Source File
+# Begin Source File
+
SOURCE=.\misc\unix\uuid.c
# End Source File
# Begin Source File
@@ -426,10 +426,6 @@
# End Source File
# Begin Source File
-SOURCE=.\include\arch\unix\i18n.h
-# End Source File
-# Begin Source File
-
SOURCE=.\include\arch\win32\inherit.h
# End Source File
# Begin Source File
@@ -451,6 +447,10 @@
# Begin Source File
SOURCE=.\include\arch\win32\threadproc.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\include\arch\win32\utf8.h
# End Source File
# End Group
# Begin Group "Public Header Files"
1.33 +1 -1 apr/dso/win32/dso.c
Index: dso.c
===================================================================
RCS file: /home/cvs/apr/dso/win32/dso.c,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -r1.32 -r1.33
--- dso.c 20 May 2002 15:23:42 -0000 1.32
+++ dso.c 17 Jul 2002 03:26:29 -0000 1.33
@@ -56,7 +56,7 @@
#include "apr_strings.h"
#include "apr_private.h"
#include "fileio.h"
-#include "i18n.h"
+#include "utf8.h"
#if APR_HAS_DSO
1.108 +1 -1 apr/file_io/win32/open.c
Index: open.c
===================================================================
RCS file: /home/cvs/apr/file_io/win32/open.c,v
retrieving revision 1.107
retrieving revision 1.108
diff -u -r1.107 -r1.108
--- open.c 16 Jul 2002 20:10:13 -0000 1.107
+++ open.c 17 Jul 2002 03:26:29 -0000 1.108
@@ -53,7 +53,7 @@
*/
#include "apr_private.h"
-#include "win32/fileio.h"
+#include "fileio.h"
#include "apr_file_io.h"
#include "apr_general.h"
#include "apr_strings.h"
1.70 +1 -1 apr/include/arch/win32/fileio.h
Index: fileio.h
===================================================================
RCS file: /home/cvs/apr/include/arch/win32/fileio.h,v
retrieving revision 1.69
retrieving revision 1.70
diff -u -r1.69 -r1.70
--- fileio.h 11 Jul 2002 06:22:22 -0000 1.69
+++ fileio.h 17 Jul 2002 03:26:29 -0000 1.70
@@ -86,7 +86,7 @@
#endif
#if APR_HAS_UNICODE_FS
-#include "arch/unix/i18n.h"
+#include "arch/win32/utf8.h"
#include <wchar.h>
typedef apr_uint16_t apr_wchar_t;
1.1 apr/include/arch/win32/utf8.h
Index: utf8.h
===================================================================
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
#ifndef UTF8_H
#define UTF8_H
#include "apr.h"
/* If we ever support anything more exciting than char... this could move.
*/
typedef apr_uint16_t apr_wchar_t;
/**
* An APR internal function for fast utf-8 octet-encoded Unicode conversion
* to the ucs-2 wide Unicode format. This function is used for filename and
* other resource conversions for platforms providing native Unicode support.
*
* @tip Only the errors APR_EINVAL and APR_INCOMPLETE may occur, the former
* when the character code is invalid (in or out of context) and the later
* when more characters were expected, but insufficient characters remain.
*/
APR_DECLARE(apr_status_t) apr_conv_utf8_to_ucs2(const char *in,
apr_size_t *inbytes,
apr_wchar_t *out,
apr_size_t *outwords);
/**
* An APR internal function for fast ucs-2 wide Unicode format conversion to
* the utf-8 octet-encoded Unicode. This function is used for filename and
* other resource conversions for platforms providing native Unicode support.
*
* @tip Only the errors APR_EINVAL and APR_INCOMPLETE may occur, the former
* when the character code is invalid (in or out of context) and the later
* when more words were expected, but insufficient words remain.
*/
APR_DECLARE(apr_status_t) apr_conv_ucs2_to_utf8(const apr_wchar_t *in,
apr_size_t *inwords,
char *out,
apr_size_t *outbytes);
#endif /* def UTF8_H */
1.1 apr/misc/win32/utf8.c
Index: utf8.c
===================================================================
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
#include "apr.h"
#include "apr_private.h"
#include "apr_errno.h"
#include "utf8.h"
/* Implement the design principal specified by RFC 2718 2.2.5
* Guidelines for new URL Schemes - within the APR.
*
* Since many architectures support unicode, and UCS2 is the most
* efficient storage used by those archictures, these functions
* exist to validate a UCS string. It is up to the operating system
* to determine the validitity of the string in the context of it's
* native language support. File systems that support filename
* characters of 0x80-0xff but have no support of Unicode will find
* this function useful only for validating the character sequences
* and rejecting poorly encoded strings, if RFC 2718 2.2.5 naming is
* desired.
*
* from RFC 2279 UTF-8, a transformation format of ISO 10646
*
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 1:2 0000 0000-0000 007F 0xxxxxxx
* 2:2 0000 0080-0000 07FF 110XXXXx 10xxxxxx
* 3:2 0000 0800-0000 FFFF 1110XXXX 10Xxxxxx 10xxxxxx
* 4:4 0001 0000-001F FFFF 11110zXX 10XXxxxx 10xxxxxx 10xxxxxx
* inv 0020 0000-03FF FFFF 111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx
* inv 0400 0000-7FFF FFFF 1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx
10xxxxxx
*
* One of the X values must be one for the encoding length to be legit.
* Neither the z bit, nor the final two forms, are used for ucs-2
*
* "Pairs of UCS-2 values between D800 and DFFF (surrogate pairs in
* Unicode parlance), being actually UCS-4 characters transformed
* through UTF-16, need special treatment: the UTF-16 transformation
* must be undone, yielding a UCS-4 character that is then transformed
* as above."
*
* from RFC2781 UTF-16: the compressed ISO 10646 encoding bitmask
*
* U' = U - 0x10000
* U' = 000000000000yyyyyyyyyyxxxxxxxxxx
* W1 = 110110yyyyyyyyyy
* W2 = 110111xxxxxxxxxx
*
* apr_conv_utf8_to_ucs2 out bytes:sizeof(in) * 1 <= Req <= sizeof(in) * 2
*
* apr_conv_ucs2_to_utf8 out words:sizeof(in) / 2 <= Req <= sizeof(in) * 3 / 2
*/
APR_DECLARE(apr_status_t) apr_conv_utf8_to_ucs2(const char *in,
apr_size_t *inbytes,
apr_wchar_t *out,
apr_size_t *outwords)
{
apr_int64_t newch, mask;
apr_size_t expect, eating;
int ch;
while (*inbytes && *outwords)
{
ch = (unsigned char)(*in++);
if (!(ch & 0200)) {
/* US-ASCII-7 plain text
*/
--*inbytes;
--*outwords;
*(out++) = ch;
}
else
{
if ((ch & 0300) != 0300) {
/* Multibyte Continuation is out of place
*/
return APR_EINVAL;
}
else
{
/* Multibyte Sequence Lead Character
*
* Compute the expected bytes while adjusting
* or lead byte and leading zeros mask.
*/
mask = 0340;
expect = 1;
while ((ch & mask) == mask) {
mask |= mask >> 1;
if (++expect > 3) /* (truly 5 for ucs-4) */
return APR_EINVAL;
}
newch = ch & ~mask;
eating = expect + 1;
if (*inbytes <= expect)
return APR_INCOMPLETE;
/* Reject values of excessive leading 0 bits
* utf-8 _demands_ the shortest possible byte length
*/
if (expect == 1) {
if (!(newch & 0036))
return APR_EINVAL;
}
else {
/* Reject values of excessive leading 0 bits
*/
if (!newch && !((unsigned char)*in & 0077 & (mask << 1)))
return APR_EINVAL;
if (expect == 2) {
/* Reject values D800-DFFF when not utf16 encoded
* (may not be an appropriate restriction for ucs-4)
*/
if (newch == 0015 && ((unsigned char)*in & 0040))
return APR_EINVAL;
}
else if (expect == 3) {
/* Short circuit values > 110000
*/
if (newch > 4)
return APR_EINVAL;
if (newch == 4 && ((unsigned char)*in & 0060))
return APR_EINVAL;
}
}
/* Where the boolean (expect > 2) is true, we will need
* an extra word for the output.
*/
if (*outwords < (apr_size_t)(expect > 2) + 1)
break; /* buffer full */
while (expect--)
{
/* Multibyte Continuation must be legal */
if (((ch = (unsigned char)*(in++)) & 0300) != 0200)
return APR_EINVAL;
newch <<= 6;
newch |= (ch & 0077);
}
*inbytes -= eating;
/* newch is now a true ucs-4 character
*
* now we need to fold to ucs-2
*/
if (newch < 0x10000)
{
--*outwords;
*(out++) = (apr_wchar_t) newch;
}
else
{
*outwords -= 2;
newch -= 0x10000;
*(out++) = (apr_wchar_t) (0xD800 | (newch >> 10));
*(out++) = (apr_wchar_t) (0xDC00 | (newch & 0x03FF));
}
}
}
}
/* Buffer full 'errors' aren't errors, the client must inspect both
* the inbytes and outwords values
*/
return APR_SUCCESS;
}
APR_DECLARE(apr_status_t) apr_conv_ucs2_to_utf8(const apr_wchar_t *in,
apr_size_t *inwords,
char *out,
apr_size_t *outbytes)
{
apr_int64_t newch, require;
apr_size_t need;
char *invout;
int ch;
while (*inwords && *outbytes)
{
ch = (unsigned short)(*in++);
if (ch < 0x80)
{
--*inwords;
--*outbytes;
*(out++) = (unsigned char) ch;
}
else
{
if ((ch & 0xFC00) == 0xDC00) {
/* Invalid Leading ucs-2 Multiword Continuation Character
*/
return APR_EINVAL;
}
if ((ch & 0xFC00) == 0xD800) {
/* Leading ucs-2 Multiword Character
*/
if (*inwords < 2) {
/* Missing ucs-2 Multiword Continuation Character
*/
return APR_INCOMPLETE;
}
if (((unsigned short)(*in) & 0xFC00) != 0xDC00) {
/* Invalid ucs-2 Multiword Continuation Character
*/
return APR_EINVAL;
}
newch = (ch & 0x03FF) << 10 | ((unsigned short)(*in++) &
0x03FF);
newch += 0x10000;
}
else {
/* ucs-2 Single Word Character
*/
newch = ch;
}
/* Determine the absolute minimum utf-8 bytes required
*/
require = newch >> 11;
need = 1;
while (require)
require >>= 5, ++need;
if (need >= *outbytes)
break; /* Insufficient buffer */
*inwords -= (need > 2) + 1;
*outbytes -= need + 1;
/* Compute the utf-8 characters in last to first order,
* calculating the lead character length bits along the way.
*/
ch = 0200;
out += need + 1;
invout = out;
while (need--) {
ch |= ch >> 1;
*(--invout) = (unsigned char)(0200 | (newch & 0077));
newch >>= 6;
}
/* Compute the lead utf-8 character and move the dest offset
*/
*(--invout) = (unsigned char)(ch | newch);
}
}
/* Buffer full 'errors' aren't errors, the client must inspect both
* the inwords and outbytes values
*/
return APR_SUCCESS;
}
1.5 +1 -1 apr/test/testucs.c
Index: testucs.c
===================================================================
RCS file: /home/cvs/apr/test/testucs.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- testucs.c 29 Jan 2002 05:49:21 -0000 1.4
+++ testucs.c 17 Jul 2002 03:26:30 -0000 1.5
@@ -1,5 +1,5 @@
#include "apr_xlate.h"
-#include "../include/arch/unix/i18n.h"
+#include "../include/misc/win32/utf8.h"
#include <wchar.h>
#include <string.h>