On Thu, Aug 27, 2009 at 11:31 PM, Vladimir 'phcoder'
Serbinenko<phco...@gmail.com> wrote:
> On Wed, Aug 26, 2009 at 2:31 AM, Robert Millan<r...@aybabtu.com> wrote:
>> On Mon, Aug 24, 2009 at 09:23:22PM +0200, Vladimir 'phcoder' Serbinenko 
>> wrote:
>>
>>> 2009-08-24  Vladimir Serbinenko  <phco...@gmail.com>
>>>
>>>       UTF-8 to UTF-16 transformation.
>>>
>>>       * conf/common.rmk (pkglib_MODULES): Add utf.mod
>>>       (utf_mod_SOURCES): New variable.
>>>       (utf_mod_CFLAGS): Likewise.
>>>       (utf_mod_LDFLAGS): Likewise.
>>>       * include/grub/utf.h: New file.
>>>       * lib/utf.c: New file. (Based on grub_utf8_to_ucs4 from kern/misc.c)
>>
>> Sounds like we could end up needing more of this (to other charsets), so
>> why not give this module a generic name to hint as to where it can be added?
>>
> I'm ok with renaming but whether a conversion goes to charset.mod is
> perhaps to be decided on case-by-case basis-
>> The conversion functions in kern/misc.c could eventually move there as well,
>> once UTF-8 support becomes optional in the kernel.
> utf16_to_utf8 can be moved now out of the kernel but it's used by some
> fs modules (e.g. fat). Perhaps utf16_to_utf8 should be a separate
> module? This would decrease the size of biggest cores with the price
> of its increase in smaller cores.
>>
>> GNU libc has "iconv" command and "iconv_*" facilities for charset conversion,
>> how about iconv.mod for consistency?
>>
>>> +       if ((c & 0x80) == 0x00)
>>> +         code = c;
>>> +       else if ((c & 0xe0) == 0xc0)
>>
>> These should be macroified.
>>
> Actually this are accelerated bitchecks (bit numbers follow specific
> and easy pattern) and for real readability would have to be written in
> binary but AFAIK binary notation isn't supported in C code and would
> result in overly long strings
>> --
>> Robert Millan
>>
>>  The DRM opt-in fallacy: "Your data belongs to us. We will decide when (and
>>  how) you may access your data; but nobody's threatening your freedom: we
>>  still allow you to remove your data and not access it at all."
>>
>>
>> _______________________________________________
>> Grub-devel mailing list
>> Grub-devel@gnu.org
>> http://lists.gnu.org/mailman/listinfo/grub-devel
>>
>
>
>
> --
> Regards
> Vladimir 'phcoder' Serbinenko
>
> Personal git repository: http://repo.or.cz/w/grub2/phcoder.git
>



-- 
Regards
Vladimir 'phcoder' Serbinenko

Personal git repository: http://repo.or.cz/w/grub2/phcoder.git
diff --git a/ChangeLog b/ChangeLog
index ab542e2..367ab05 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -158,6 +158,17 @@
 
 2009-08-24  Vladimir Serbinenko  <phco...@gmail.com>
 
+       UTF-8 to UTF-16 transformation.
+
+       * conf/common.rmk (pkglib_MODULES): Add utf.mod
+       (utf_mod_SOURCES): New variable.
+       (utf_mod_CFLAGS): Likewise.
+       (utf_mod_LDFLAGS): Likewise.
+       * include/grub/utf.h: New file.
+       * lib/utf.c: New file. (Based on grub_utf8_to_ucs4 from kern/misc.c)
+
+2009-08-24  Vladimir Serbinenko  <phco...@gmail.com>
+
        * script/sh/function.c (grub_script_function_find): Cut error message
        not to flood terminal.
        * script/sh/lexer.c (grub_script_yylex): Remove command line length
diff --git a/conf/common.rmk b/conf/common.rmk
index 7727f19..735e57a 100644
--- a/conf/common.rmk
+++ b/conf/common.rmk
@@ -633,3 +633,8 @@ pkglib_MODULES += setjmp.mod
 setjmp_mod_SOURCES = lib/$(target_cpu)/setjmp.S
 setjmp_mod_ASFLAGS = $(COMMON_ASFLAGS)
 setjmp_mod_LDFLAGS = $(COMMON_LDFLAGS)
+
+pkglib_MODULES += charset.mod
+charset_mod_SOURCES = lib/charset.c
+charset_mod_CFLAGS = $(COMMON_CFLAGS)
+charset_mod_LDFLAGS = $(COMMON_LDFLAGS)
diff --git a/include/grub/charset.h b/include/grub/charset.h
new file mode 100644
index 0000000..22b6724
--- /dev/null
+++ b/include/grub/charset.h
@@ -0,0 +1,50 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (C) 2009  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GRUB_CHARSET_HEADER
+#define GRUB_CHARSET_HEADER    1
+
+#include <grub/types.h>
+
+#define GRUB_UINT8_1_LEADINGBIT 0x80
+#define GRUB_UINT8_2_LEADINGBITS 0xc0
+#define GRUB_UINT8_3_LEADINGBITS 0xe0
+#define GRUB_UINT8_4_LEADINGBITS 0xf0
+#define GRUB_UINT8_5_LEADINGBITS 0xf8
+#define GRUB_UINT8_6_LEADINGBITS 0xfc
+#define GRUB_UINT8_7_LEADINGBITS 0xfe
+
+#define GRUB_UINT8_1_TRAILINGBIT 0x01
+#define GRUB_UINT8_2_TRAILINGBITS 0x03
+#define GRUB_UINT8_3_TRAILINGBITS 0x07
+#define GRUB_UINT8_4_TRAILINGBITS 0x0f
+#define GRUB_UINT8_5_TRAILINGBITS 0x1f
+#define GRUB_UINT8_6_TRAILINGBITS 0x3f
+
+#define GRUB_UCS2_LIMIT 0x10000
+#define GRUB_UTF16_UPPER_SURROGATE(code) \
+  (0xD800 + ((((code) - GRUB_UCS2_LIMIT) >> 12) & 0xfff))
+#define GRUB_UTF16_LOWER_SURROGATE(code) \
+  (0xDC00 + (((code) - GRUB_UCS2_LIMIT) & 0xfff))
+
+grub_ssize_t
+grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
+                   const grub_uint8_t *src, grub_size_t srcsize,
+                   const grub_uint8_t **srcend);
+
+#endif
diff --git a/lib/charset.c b/lib/charset.c
new file mode 100644
index 0000000..8bc5b91
--- /dev/null
+++ b/lib/charset.c
@@ -0,0 +1,116 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009  Free 
Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
+   bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
+   Return the number of characters converted. DEST must be able to hold
+   at least DESTSIZE characters. If an invalid sequence is found, return -1.
+   If SRCEND is not NULL, then *SRCEND is set to the next byte after the
+   last byte used in SRC.  */
+
+#include <grub/charset.h>
+
+grub_ssize_t
+grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
+                   const grub_uint8_t *src, grub_size_t srcsize,
+                   const grub_uint8_t **srcend)
+{
+  grub_uint16_t *p = dest;
+  int count = 0;
+  grub_uint32_t code = 0;
+
+  if (srcend)
+    *srcend = src;
+
+  while (srcsize && destsize)
+    {
+      grub_uint32_t c = *src++;
+      if (srcsize != (grub_size_t)-1)
+       srcsize--;
+      if (count)
+       {
+         if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
+           {
+             /* invalid */
+             return -1;
+           }
+         else
+           {
+             code <<= 6;
+             code |= (c & GRUB_UINT8_6_TRAILINGBITS);
+             count--;
+           }
+       }
+      else
+       {
+         if (c == 0)
+           break;
+
+         if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
+           code = c;
+         else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
+           {
+             count = 1;
+             code = c & GRUB_UINT8_5_TRAILINGBITS;
+           }
+         else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
+           {
+             count = 2;
+             code = c & GRUB_UINT8_4_TRAILINGBITS;
+           }
+         else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
+           {
+             count = 3;
+             code = c & GRUB_UINT8_3_TRAILINGBITS;
+           }
+         else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS)
+           {
+             count = 4;
+             code = c & GRUB_UINT8_2_TRAILINGBITS;
+           }
+         else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS)
+           {
+             count = 5;
+             code = c & GRUB_UINT8_1_TRAILINGBIT;
+           }
+         else
+           return -1;
+       }
+
+      if (count == 0)
+       {
+         if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
+           break;
+         if (code >= GRUB_UCS2_LIMIT)
+           {
+             *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
+             *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
+             destsize -= 2;
+           }
+         else
+           {
+             *p++ = code;
+             destsize--;
+           }
+       }
+    }
+
+  if (srcend)
+    *srcend = src;
+  return p - dest;
+}
_______________________________________________
Grub-devel mailing list
Grub-devel@gnu.org
http://lists.gnu.org/mailman/listinfo/grub-devel

Reply via email to