On Thu, Aug 27, 2009 at 11:31 PM, Vladimir 'phcoder' Serbinenko<phco...@gmail.com> wrote: > On Wed, Aug 26, 2009 at 2:31 AM, Robert Millan<r...@aybabtu.com> wrote: >> On Mon, Aug 24, 2009 at 09:23:22PM +0200, Vladimir 'phcoder' Serbinenko >> wrote: >> >>> 2009-08-24 Vladimir Serbinenko <phco...@gmail.com> >>> >>> UTF-8 to UTF-16 transformation. >>> >>> * conf/common.rmk (pkglib_MODULES): Add utf.mod >>> (utf_mod_SOURCES): New variable. >>> (utf_mod_CFLAGS): Likewise. >>> (utf_mod_LDFLAGS): Likewise. >>> * include/grub/utf.h: New file. >>> * lib/utf.c: New file. (Based on grub_utf8_to_ucs4 from kern/misc.c) >> >> Sounds like we could end up needing more of this (to other charsets), so >> why not give this module a generic name to hint as to where it can be added? >> > I'm ok with renaming but whether a conversion goes to charset.mod is > perhaps to be decided on case-by-case basis- >> The conversion functions in kern/misc.c could eventually move there as well, >> once UTF-8 support becomes optional in the kernel. > utf16_to_utf8 can be moved now out of the kernel but it's used by some > fs modules (e.g. fat). Perhaps utf16_to_utf8 should be a separate > module? This would decrease the size of biggest cores with the price > of its increase in smaller cores. >> >> GNU libc has "iconv" command and "iconv_*" facilities for charset conversion, >> how about iconv.mod for consistency? >> >>> + if ((c & 0x80) == 0x00) >>> + code = c; >>> + else if ((c & 0xe0) == 0xc0) >> >> These should be macroified. >> > Actually this are accelerated bitchecks (bit numbers follow specific > and easy pattern) and for real readability would have to be written in > binary but AFAIK binary notation isn't supported in C code and would > result in overly long strings >> -- >> Robert Millan >> >> The DRM opt-in fallacy: "Your data belongs to us. We will decide when (and >> how) you may access your data; but nobody's threatening your freedom: we >> still allow you to remove your data and not access it at all." >> >> >> _______________________________________________ >> Grub-devel mailing list >> Grub-devel@gnu.org >> http://lists.gnu.org/mailman/listinfo/grub-devel >> > > > > -- > Regards > Vladimir 'phcoder' Serbinenko > > Personal git repository: http://repo.or.cz/w/grub2/phcoder.git >
-- Regards Vladimir 'phcoder' Serbinenko Personal git repository: http://repo.or.cz/w/grub2/phcoder.git
diff --git a/ChangeLog b/ChangeLog index ab542e2..367ab05 100644 --- a/ChangeLog +++ b/ChangeLog @@ -158,6 +158,17 @@ 2009-08-24 Vladimir Serbinenko <phco...@gmail.com> + UTF-8 to UTF-16 transformation. + + * conf/common.rmk (pkglib_MODULES): Add utf.mod + (utf_mod_SOURCES): New variable. + (utf_mod_CFLAGS): Likewise. + (utf_mod_LDFLAGS): Likewise. + * include/grub/utf.h: New file. + * lib/utf.c: New file. (Based on grub_utf8_to_ucs4 from kern/misc.c) + +2009-08-24 Vladimir Serbinenko <phco...@gmail.com> + * script/sh/function.c (grub_script_function_find): Cut error message not to flood terminal. * script/sh/lexer.c (grub_script_yylex): Remove command line length diff --git a/conf/common.rmk b/conf/common.rmk index 7727f19..735e57a 100644 --- a/conf/common.rmk +++ b/conf/common.rmk @@ -633,3 +633,8 @@ pkglib_MODULES += setjmp.mod setjmp_mod_SOURCES = lib/$(target_cpu)/setjmp.S setjmp_mod_ASFLAGS = $(COMMON_ASFLAGS) setjmp_mod_LDFLAGS = $(COMMON_LDFLAGS) + +pkglib_MODULES += charset.mod +charset_mod_SOURCES = lib/charset.c +charset_mod_CFLAGS = $(COMMON_CFLAGS) +charset_mod_LDFLAGS = $(COMMON_LDFLAGS) diff --git a/include/grub/charset.h b/include/grub/charset.h new file mode 100644 index 0000000..22b6724 --- /dev/null +++ b/include/grub/charset.h @@ -0,0 +1,50 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2009 Free Software Foundation, Inc. + * + * GRUB is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GRUB is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GRUB. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GRUB_CHARSET_HEADER +#define GRUB_CHARSET_HEADER 1 + +#include <grub/types.h> + +#define GRUB_UINT8_1_LEADINGBIT 0x80 +#define GRUB_UINT8_2_LEADINGBITS 0xc0 +#define GRUB_UINT8_3_LEADINGBITS 0xe0 +#define GRUB_UINT8_4_LEADINGBITS 0xf0 +#define GRUB_UINT8_5_LEADINGBITS 0xf8 +#define GRUB_UINT8_6_LEADINGBITS 0xfc +#define GRUB_UINT8_7_LEADINGBITS 0xfe + +#define GRUB_UINT8_1_TRAILINGBIT 0x01 +#define GRUB_UINT8_2_TRAILINGBITS 0x03 +#define GRUB_UINT8_3_TRAILINGBITS 0x07 +#define GRUB_UINT8_4_TRAILINGBITS 0x0f +#define GRUB_UINT8_5_TRAILINGBITS 0x1f +#define GRUB_UINT8_6_TRAILINGBITS 0x3f + +#define GRUB_UCS2_LIMIT 0x10000 +#define GRUB_UTF16_UPPER_SURROGATE(code) \ + (0xD800 + ((((code) - GRUB_UCS2_LIMIT) >> 12) & 0xfff)) +#define GRUB_UTF16_LOWER_SURROGATE(code) \ + (0xDC00 + (((code) - GRUB_UCS2_LIMIT) & 0xfff)) + +grub_ssize_t +grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, + const grub_uint8_t *src, grub_size_t srcsize, + const grub_uint8_t **srcend); + +#endif diff --git a/lib/charset.c b/lib/charset.c new file mode 100644 index 0000000..8bc5b91 --- /dev/null +++ b/lib/charset.c @@ -0,0 +1,116 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc. + * + * GRUB is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GRUB is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GRUB. If not, see <http://www.gnu.org/licenses/>. + */ + +/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE + bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string. + Return the number of characters converted. DEST must be able to hold + at least DESTSIZE characters. If an invalid sequence is found, return -1. + If SRCEND is not NULL, then *SRCEND is set to the next byte after the + last byte used in SRC. */ + +#include <grub/charset.h> + +grub_ssize_t +grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, + const grub_uint8_t *src, grub_size_t srcsize, + const grub_uint8_t **srcend) +{ + grub_uint16_t *p = dest; + int count = 0; + grub_uint32_t code = 0; + + if (srcend) + *srcend = src; + + while (srcsize && destsize) + { + grub_uint32_t c = *src++; + if (srcsize != (grub_size_t)-1) + srcsize--; + if (count) + { + if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT) + { + /* invalid */ + return -1; + } + else + { + code <<= 6; + code |= (c & GRUB_UINT8_6_TRAILINGBITS); + count--; + } + } + else + { + if (c == 0) + break; + + if ((c & GRUB_UINT8_1_LEADINGBIT) == 0) + code = c; + else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS) + { + count = 1; + code = c & GRUB_UINT8_5_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS) + { + count = 2; + code = c & GRUB_UINT8_4_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS) + { + count = 3; + code = c & GRUB_UINT8_3_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS) + { + count = 4; + code = c & GRUB_UINT8_2_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS) + { + count = 5; + code = c & GRUB_UINT8_1_TRAILINGBIT; + } + else + return -1; + } + + if (count == 0) + { + if (destsize < 2 && code >= GRUB_UCS2_LIMIT) + break; + if (code >= GRUB_UCS2_LIMIT) + { + *p++ = GRUB_UTF16_UPPER_SURROGATE (code); + *p++ = GRUB_UTF16_LOWER_SURROGATE (code); + destsize -= 2; + } + else + { + *p++ = code; + destsize--; + } + } + } + + if (srcend) + *srcend = src; + return p - dest; +}
_______________________________________________ Grub-devel mailing list Grub-devel@gnu.org http://lists.gnu.org/mailman/listinfo/grub-devel