Hi,
In the GNU gettext support for GNUstep, Localizable.strings files are
generated (from PO files) that have the documented format for .strings.
Since the interpretation of these files should not depend on
getenv("GNUSTEP_STRING_ENCODING") and must accomodate all kinds of
characters, I've chosen to create these files in UTF-8 encoding. Furthermore,
since the only documented means of forcing initWithContentsOfFile: (which
is called by NSBundle indirectly) to interpret a file as UTF-8 is to prefix
it with the UTF-8 BOM, I've done that as well. But I get a parse error
from gnustep-base because the file is interpreted as being in ISO-8859-1,
despite the UTF-8 BOM. Here is a fix; the code was expecting a BOM of
EF 00 BB 00 BF 00 - which is not even valid UTF-8.
I've tested this patch for files with
NSString *c = (NSString *) [NSString stringWithContentsOfFile:
@".../German.lproj/Localizable.strings"];
puts ([c cString]);
The patch for URLs is the same.
2003-10-16 Bruno Haible <[EMAIL PROTECTED]>
* Source/NSString.m (initWithContentsOfFile:, initWithContentsOfURL:):
Fix recognition of UTF-8 BOM.
*** gnustep-base-1.8.0/Source/NSString.m.bak 2003-08-07 20:22:03.000000000 +0200
--- gnustep-base-1.8.0/Source/NSString.m 2003-10-16 23:39:28.000000000 +0200
***************
*** 1321,1327 ****
NSStringEncoding enc = _DefaultStringEncoding;
NSData *d;
unsigned int len;
! const unichar *test;
d = [[NSDataClass alloc] initWithContentsOfFile: path];
if (d == nil)
--- 1321,1327 ----
NSStringEncoding enc = _DefaultStringEncoding;
NSData *d;
unsigned int len;
! const unsigned char *data_bytes;
d = [[NSDataClass alloc] initWithContentsOfFile: path];
if (d == nil)
***************
*** 1336,1350 ****
RELEASE(self);
return @"";
}
! test = [d bytes];
! if ((test != NULL) && (len > 1))
{
! if ((test[0] == byteOrderMark) || (test[0] == byteOrderMarkSwapped))
{
/* somebody set up us the BOM! */
enc = NSUnicodeStringEncoding;
}
! else if (len > 2 && test[0] == 0xEF && test[1] == 0xBB && test[2] == 0xBF)
{
enc = NSUTF8StringEncoding;
}
--- 1336,1355 ----
RELEASE(self);
return @"";
}
! data_bytes = [d bytes];
! if ((data_bytes != NULL) && (len >= 2))
{
! const unichar *data_ucs2chars = (const unichar *) data_bytes;
! if ((data_ucs2chars[0] == byteOrderMark)
! || (data_ucs2chars[0] == byteOrderMarkSwapped))
{
/* somebody set up us the BOM! */
enc = NSUnicodeStringEncoding;
}
! else if (len >= 3
! && data_bytes[0] == 0xEF
! && data_bytes[1] == 0xBB
! && data_bytes[2] == 0xBF)
{
enc = NSUTF8StringEncoding;
}
***************
*** 1363,1369 ****
NSStringEncoding enc = _DefaultStringEncoding;
NSData *d = [NSDataClass dataWithContentsOfURL: url];
unsigned int len = [d length];
! const unichar *test;
if (d == nil)
{
--- 1368,1374 ----
NSStringEncoding enc = _DefaultStringEncoding;
NSData *d = [NSDataClass dataWithContentsOfURL: url];
unsigned int len = [d length];
! const unsigned char *data_bytes;
if (d == nil)
{
***************
*** 1376,1389 ****
RELEASE(self);
return @"";
}
! test = [d bytes];
! if ((test != NULL) && (len > 1))
{
! if ((test[0] == byteOrderMark) || (test[0] == byteOrderMarkSwapped))
{
enc = NSUnicodeStringEncoding;
}
! else if (len > 2 && test[0] == 0xEF && test[1] == 0xBB && test[2] == 0xBF)
{
enc = NSUTF8StringEncoding;
}
--- 1381,1399 ----
RELEASE(self);
return @"";
}
! data_bytes = [d bytes];
! if ((data_bytes != NULL) && (len >= 2))
{
! const unichar *data_ucs2chars = (const unichar *) data_bytes;
! if ((data_ucs2chars[0] == byteOrderMark)
! || (data_ucs2chars[0] == byteOrderMarkSwapped))
{
enc = NSUnicodeStringEncoding;
}
! else if (len >= 3
! && data_bytes[0] == 0xEF
! && data_bytes[1] == 0xBB
! && data_bytes[2] == 0xBF)
{
enc = NSUTF8StringEncoding;
}
_______________________________________________
Bug-gnustep mailing list
[EMAIL PROTECTED]
http://mail.gnu.org/mailman/listinfo/bug-gnustep