Changeset: a668cb93b489 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/a668cb93b489
Removed Files:
monetdb5/modules/atoms/utf8.h
Modified Files:
clients/Tests/MAL-signatures-hge.test
clients/Tests/MAL-signatures.test
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_string.c
monetdb5/modules/atoms/str.c
monetdb5/modules/atoms/str.h
monetdb5/modules/kernel/batstr.c
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/tablet.c
Branch: ascii-flag
Log Message:
Implemented case conversion and case comparison in a new way and moved to GDK.
Case conversion and case comparison use the same tables, and comparison
does not first convert. The new code does not use BATs and does not use
hashes like the old code.
See the implementation in GDK for comments.
diffs (truncated from 9856 to 300 lines):
diff --git a/clients/Tests/MAL-signatures-hge.test
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -50764,11 +50764,6 @@ pattern str.endswithselect(X_0:bat[:str]
STRendswithselect;
Select all head values of the first input BAT for which the@tail value end
with the given suffix + icase.
str
-epilogue
-command str.epilogue():void
-STRepilogue;
-(empty)
-str
insert
command str.insert(X_0:str, X_1:int, X_2:int, X_3:str):str
STRinsert;
diff --git a/clients/Tests/MAL-signatures.test
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -39089,11 +39089,6 @@ pattern str.endswithselect(X_0:bat[:str]
STRendswithselect;
Select all head values of the first input BAT for which the@tail value end
with the given suffix + icase.
str
-epilogue
-command str.epilogue():void
-STRepilogue;
-(empty)
-str
insert
command str.insert(X_0:str, X_1:int, X_2:int, X_3:str):str
STRinsert;
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -318,8 +318,13 @@ void GDKsetmallocsuccesscount(lng count)
stream *GDKstdin;
stream *GDKstdout;
ssize_t GDKstrFromStr(unsigned char *restrict dst, const unsigned char
*restrict src, ssize_t len, char quote);
+int GDKstrcasecmp(const char *s1, const char *s2);
+char *GDKstrcasestr(const char *haystack, const char *needle);
str GDKstrdup(const char *s) __attribute__((__malloc__))
__attribute__((__warn_unused_result__));
+int GDKstrncasecmp(const char *str1, const char *str2, size_t l1, size_t l2);
str GDKstrndup(const char *s, size_t n) __attribute__((__malloc__))
__attribute__((__warn_unused_result__));
+gdk_return GDKtolower(char **buf, size_t *buflen, const char *s);
+gdk_return GDKtoupper(char **buf, size_t *buflen, const char *s);
gdk_return GDKtracer_fill_comp_info(BAT *id, BAT *component, BAT *log_level);
gdk_return GDKtracer_flush_buffer(void);
const char *GDKtracer_get_component_level(const char *comp);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2341,6 +2341,12 @@ gdk_export gdk_return BATfirstn(BAT **to
#include "gdk_calc.h"
+gdk_export gdk_return GDKtoupper(char **buf, size_t *buflen, const char *s);
+gdk_export gdk_return GDKtolower(char **buf, size_t *buflen, const char *s);
+gdk_export int GDKstrncasecmp(const char *str1, const char *str2, size_t l1,
size_t l2);
+gdk_export int GDKstrcasecmp(const char *s1, const char *s2);
+gdk_export char *GDKstrcasestr(const char *haystack, const char *needle);
+
/*
* @- BAT sample operators
*
diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c
--- a/gdk/gdk_string.c
+++ b/gdk/gdk_string.c
@@ -1453,3 +1453,3300 @@ GDKanalytical_str_group_concat(BAT *r, B
GDKerror("str_group_concat not yet implemented for current row until
unbounded case\n");
return GDK_FAIL;
}
+
+/* The two case conversion tables are specially crafted from the
+ * UnicodeData.txt file for efficient lookup.
+ *
+ * For the first byte of a UTF-8 encoding, use the value as index into
+ * the table. If the value is zero, there are no conversions for any
+ * UTF-8 string starting with this byte (this includes both multi-byte
+ * sequences and single-byte sequences). For a single-byte sequence, if
+ * the value is not zero, it is the converted codepoint. For a
+ * multi-byte sequence, if the value is not zero, it is an offset into
+ * the same table. The next byte is added to the offset and again used
+ * as index into the table (including the top two bits which are always
+ * 1 and 0 respectively). The process then repeats: if zero, no
+ * conversions for any sequence starting with the bytes looked up so
+ * far, if non-zero, if this is the last byte of a sequence, it is the
+ * converted codepoint, and otherwise a (new) offset into the same
+ * table. */
+static int lowercase[4288] = {
+ [0x41] = 0x61, /* U+0041: LATIN CAPITAL LETTER A */
+ [0x42] = 0x62, /* U+0042: LATIN CAPITAL LETTER B */
+ [0x43] = 0x63, /* U+0043: LATIN CAPITAL LETTER C */
+ [0x44] = 0x64, /* U+0044: LATIN CAPITAL LETTER D */
+ [0x45] = 0x65, /* U+0045: LATIN CAPITAL LETTER E */
+ [0x46] = 0x66, /* U+0046: LATIN CAPITAL LETTER F */
+ [0x47] = 0x67, /* U+0047: LATIN CAPITAL LETTER G */
+ [0x48] = 0x68, /* U+0048: LATIN CAPITAL LETTER H */
+ [0x49] = 0x69, /* U+0049: LATIN CAPITAL LETTER I */
+ [0x4A] = 0x6A, /* U+004A: LATIN CAPITAL LETTER J */
+ [0x4B] = 0x6B, /* U+004B: LATIN CAPITAL LETTER K */
+ [0x4C] = 0x6C, /* U+004C: LATIN CAPITAL LETTER L */
+ [0x4D] = 0x6D, /* U+004D: LATIN CAPITAL LETTER M */
+ [0x4E] = 0x6E, /* U+004E: LATIN CAPITAL LETTER N */
+ [0x4F] = 0x6F, /* U+004F: LATIN CAPITAL LETTER O */
+ [0x50] = 0x70, /* U+0050: LATIN CAPITAL LETTER P */
+ [0x51] = 0x71, /* U+0051: LATIN CAPITAL LETTER Q */
+ [0x52] = 0x72, /* U+0052: LATIN CAPITAL LETTER R */
+ [0x53] = 0x73, /* U+0053: LATIN CAPITAL LETTER S */
+ [0x54] = 0x74, /* U+0054: LATIN CAPITAL LETTER T */
+ [0x55] = 0x75, /* U+0055: LATIN CAPITAL LETTER U */
+ [0x56] = 0x76, /* U+0056: LATIN CAPITAL LETTER V */
+ [0x57] = 0x77, /* U+0057: LATIN CAPITAL LETTER W */
+ [0x58] = 0x78, /* U+0058: LATIN CAPITAL LETTER X */
+ [0x59] = 0x79, /* U+0059: LATIN CAPITAL LETTER Y */
+ [0x5A] = 0x7A, /* U+005A: LATIN CAPITAL LETTER Z */
+ [0xC3] = 256 - 0x80, /* 303 ... */
+ [256+0x0] = 0xE0, /* U+00C0: LATIN CAPITAL LETTER A WITH GRAVE */
+ [256+0x1] = 0xE1, /* U+00C1: LATIN CAPITAL LETTER A WITH ACUTE */
+ [256+0x2] = 0xE2, /* U+00C2: LATIN CAPITAL LETTER A WITH
CIRCUMFLEX */
+ [256+0x3] = 0xE3, /* U+00C3: LATIN CAPITAL LETTER A WITH TILDE */
+ [256+0x4] = 0xE4, /* U+00C4: LATIN CAPITAL LETTER A WITH
DIAERESIS */
+ [256+0x5] = 0xE5, /* U+00C5: LATIN CAPITAL LETTER A WITH RING
ABOVE */
+ [256+0x6] = 0xE6, /* U+00C6: LATIN CAPITAL LETTER AE */
+ [256+0x7] = 0xE7, /* U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA
*/
+ [256+0x8] = 0xE8, /* U+00C8: LATIN CAPITAL LETTER E WITH GRAVE */
+ [256+0x9] = 0xE9, /* U+00C9: LATIN CAPITAL LETTER E WITH ACUTE */
+ [256+0xA] = 0xEA, /* U+00CA: LATIN CAPITAL LETTER E WITH
CIRCUMFLEX */
+ [256+0xB] = 0xEB, /* U+00CB: LATIN CAPITAL LETTER E WITH
DIAERESIS */
+ [256+0xC] = 0xEC, /* U+00CC: LATIN CAPITAL LETTER I WITH GRAVE */
+ [256+0xD] = 0xED, /* U+00CD: LATIN CAPITAL LETTER I WITH ACUTE */
+ [256+0xE] = 0xEE, /* U+00CE: LATIN CAPITAL LETTER I WITH
CIRCUMFLEX */
+ [256+0xF] = 0xEF, /* U+00CF: LATIN CAPITAL LETTER I WITH
DIAERESIS */
+ [256+0x10] = 0xF0, /* U+00D0: LATIN CAPITAL LETTER ETH */
+ [256+0x11] = 0xF1, /* U+00D1: LATIN CAPITAL LETTER N WITH TILDE */
+ [256+0x12] = 0xF2, /* U+00D2: LATIN CAPITAL LETTER O WITH GRAVE */
+ [256+0x13] = 0xF3, /* U+00D3: LATIN CAPITAL LETTER O WITH ACUTE */
+ [256+0x14] = 0xF4, /* U+00D4: LATIN CAPITAL LETTER O WITH
CIRCUMFLEX */
+ [256+0x15] = 0xF5, /* U+00D5: LATIN CAPITAL LETTER O WITH TILDE */
+ [256+0x16] = 0xF6, /* U+00D6: LATIN CAPITAL LETTER O WITH
DIAERESIS */
+ [256+0x18] = 0xF8, /* U+00D8: LATIN CAPITAL LETTER O WITH STROKE */
+ [256+0x19] = 0xF9, /* U+00D9: LATIN CAPITAL LETTER U WITH GRAVE */
+ [256+0x1A] = 0xFA, /* U+00DA: LATIN CAPITAL LETTER U WITH ACUTE */
+ [256+0x1B] = 0xFB, /* U+00DB: LATIN CAPITAL LETTER U WITH
CIRCUMFLEX */
+ [256+0x1C] = 0xFC, /* U+00DC: LATIN CAPITAL LETTER U WITH
DIAERESIS */
+ [256+0x1D] = 0xFD, /* U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE */
+ [256+0x1E] = 0xFE, /* U+00DE: LATIN CAPITAL LETTER THORN */
+ [0xC4] = 320 - 0x80, /* 304 ... */
+ [320+0x0] = 0x101, /* U+0100: LATIN CAPITAL LETTER A WITH MACRON */
+ [320+0x2] = 0x103, /* U+0102: LATIN CAPITAL LETTER A WITH BREVE */
+ [320+0x4] = 0x105, /* U+0104: LATIN CAPITAL LETTER A WITH OGONEK */
+ [320+0x6] = 0x107, /* U+0106: LATIN CAPITAL LETTER C WITH ACUTE */
+ [320+0x8] = 0x109, /* U+0108: LATIN CAPITAL LETTER C WITH
CIRCUMFLEX */
+ [320+0xA] = 0x10B, /* U+010A: LATIN CAPITAL LETTER C WITH DOT
ABOVE */
+ [320+0xC] = 0x10D, /* U+010C: LATIN CAPITAL LETTER C WITH CARON */
+ [320+0xE] = 0x10F, /* U+010E: LATIN CAPITAL LETTER D WITH CARON */
+ [320+0x10] = 0x111, /* U+0110: LATIN CAPITAL LETTER D WITH STROKE */
+ [320+0x12] = 0x113, /* U+0112: LATIN CAPITAL LETTER E WITH MACRON */
+ [320+0x14] = 0x115, /* U+0114: LATIN CAPITAL LETTER E WITH BREVE */
+ [320+0x16] = 0x117, /* U+0116: LATIN CAPITAL LETTER E WITH DOT
ABOVE */
+ [320+0x18] = 0x119, /* U+0118: LATIN CAPITAL LETTER E WITH OGONEK */
+ [320+0x1A] = 0x11B, /* U+011A: LATIN CAPITAL LETTER E WITH CARON */
+ [320+0x1C] = 0x11D, /* U+011C: LATIN CAPITAL LETTER G WITH
CIRCUMFLEX */
+ [320+0x1E] = 0x11F, /* U+011E: LATIN CAPITAL LETTER G WITH BREVE */
+ [320+0x20] = 0x121, /* U+0120: LATIN CAPITAL LETTER G WITH DOT
ABOVE */
+ [320+0x22] = 0x123, /* U+0122: LATIN CAPITAL LETTER G WITH CEDILLA
*/
+ [320+0x24] = 0x125, /* U+0124: LATIN CAPITAL LETTER H WITH
CIRCUMFLEX */
+ [320+0x26] = 0x127, /* U+0126: LATIN CAPITAL LETTER H WITH STROKE */
+ [320+0x28] = 0x129, /* U+0128: LATIN CAPITAL LETTER I WITH TILDE */
+ [320+0x2A] = 0x12B, /* U+012A: LATIN CAPITAL LETTER I WITH MACRON */
+ [320+0x2C] = 0x12D, /* U+012C: LATIN CAPITAL LETTER I WITH BREVE */
+ [320+0x2E] = 0x12F, /* U+012E: LATIN CAPITAL LETTER I WITH OGONEK */
+ [320+0x30] = 0x69, /* U+0130: LATIN CAPITAL LETTER I WITH DOT
ABOVE */
+ [320+0x32] = 0x133, /* U+0132: LATIN CAPITAL LIGATURE IJ */
+ [320+0x34] = 0x135, /* U+0134: LATIN CAPITAL LETTER J WITH
CIRCUMFLEX */
+ [320+0x36] = 0x137, /* U+0136: LATIN CAPITAL LETTER K WITH CEDILLA
*/
+ [320+0x39] = 0x13A, /* U+0139: LATIN CAPITAL LETTER L WITH ACUTE */
+ [320+0x3B] = 0x13C, /* U+013B: LATIN CAPITAL LETTER L WITH CEDILLA
*/
+ [320+0x3D] = 0x13E, /* U+013D: LATIN CAPITAL LETTER L WITH CARON */
+ [320+0x3F] = 0x140, /* U+013F: LATIN CAPITAL LETTER L WITH MIDDLE
DOT */
+ [0xC5] = 384 - 0x80, /* 305 ... */
+ [384+0x1] = 0x142, /* U+0141: LATIN CAPITAL LETTER L WITH STROKE */
+ [384+0x3] = 0x144, /* U+0143: LATIN CAPITAL LETTER N WITH ACUTE */
+ [384+0x5] = 0x146, /* U+0145: LATIN CAPITAL LETTER N WITH CEDILLA
*/
+ [384+0x7] = 0x148, /* U+0147: LATIN CAPITAL LETTER N WITH CARON */
+ [384+0xA] = 0x14B, /* U+014A: LATIN CAPITAL LETTER ENG */
+ [384+0xC] = 0x14D, /* U+014C: LATIN CAPITAL LETTER O WITH MACRON */
+ [384+0xE] = 0x14F, /* U+014E: LATIN CAPITAL LETTER O WITH BREVE */
+ [384+0x10] = 0x151, /* U+0150: LATIN CAPITAL LETTER O WITH DOUBLE
ACUTE */
+ [384+0x12] = 0x153, /* U+0152: LATIN CAPITAL LIGATURE OE */
+ [384+0x14] = 0x155, /* U+0154: LATIN CAPITAL LETTER R WITH ACUTE */
+ [384+0x16] = 0x157, /* U+0156: LATIN CAPITAL LETTER R WITH CEDILLA
*/
+ [384+0x18] = 0x159, /* U+0158: LATIN CAPITAL LETTER R WITH CARON */
+ [384+0x1A] = 0x15B, /* U+015A: LATIN CAPITAL LETTER S WITH ACUTE */
+ [384+0x1C] = 0x15D, /* U+015C: LATIN CAPITAL LETTER S WITH
CIRCUMFLEX */
+ [384+0x1E] = 0x15F, /* U+015E: LATIN CAPITAL LETTER S WITH CEDILLA
*/
+ [384+0x20] = 0x161, /* U+0160: LATIN CAPITAL LETTER S WITH CARON */
+ [384+0x22] = 0x163, /* U+0162: LATIN CAPITAL LETTER T WITH CEDILLA
*/
+ [384+0x24] = 0x165, /* U+0164: LATIN CAPITAL LETTER T WITH CARON */
+ [384+0x26] = 0x167, /* U+0166: LATIN CAPITAL LETTER T WITH STROKE */
+ [384+0x28] = 0x169, /* U+0168: LATIN CAPITAL LETTER U WITH TILDE */
+ [384+0x2A] = 0x16B, /* U+016A: LATIN CAPITAL LETTER U WITH MACRON */
+ [384+0x2C] = 0x16D, /* U+016C: LATIN CAPITAL LETTER U WITH BREVE */
+ [384+0x2E] = 0x16F, /* U+016E: LATIN CAPITAL LETTER U WITH RING
ABOVE */
+ [384+0x30] = 0x171, /* U+0170: LATIN CAPITAL LETTER U WITH DOUBLE
ACUTE */
+ [384+0x32] = 0x173, /* U+0172: LATIN CAPITAL LETTER U WITH OGONEK */
+ [384+0x34] = 0x175, /* U+0174: LATIN CAPITAL LETTER W WITH
CIRCUMFLEX */
+ [384+0x36] = 0x177, /* U+0176: LATIN CAPITAL LETTER Y WITH
CIRCUMFLEX */
+ [384+0x38] = 0xFF, /* U+0178: LATIN CAPITAL LETTER Y WITH
DIAERESIS */
+ [384+0x39] = 0x17A, /* U+0179: LATIN CAPITAL LETTER Z WITH ACUTE */
+ [384+0x3B] = 0x17C, /* U+017B: LATIN CAPITAL LETTER Z WITH DOT
ABOVE */
+ [384+0x3D] = 0x17E, /* U+017D: LATIN CAPITAL LETTER Z WITH CARON */
+ [0xC6] = 448 - 0x80, /* 306 ... */
+ [448+0x1] = 0x253, /* U+0181: LATIN CAPITAL LETTER B WITH HOOK */
+ [448+0x2] = 0x183, /* U+0182: LATIN CAPITAL LETTER B WITH TOPBAR */
+ [448+0x4] = 0x185, /* U+0184: LATIN CAPITAL LETTER TONE SIX */
+ [448+0x6] = 0x254, /* U+0186: LATIN CAPITAL LETTER OPEN O */
+ [448+0x7] = 0x188, /* U+0187: LATIN CAPITAL LETTER C WITH HOOK */
+ [448+0x9] = 0x256, /* U+0189: LATIN CAPITAL LETTER AFRICAN D */
+ [448+0xA] = 0x257, /* U+018A: LATIN CAPITAL LETTER D WITH HOOK */
+ [448+0xB] = 0x18C, /* U+018B: LATIN CAPITAL LETTER D WITH TOPBAR */
+ [448+0xE] = 0x1DD, /* U+018E: LATIN CAPITAL LETTER REVERSED E */
+ [448+0xF] = 0x259, /* U+018F: LATIN CAPITAL LETTER SCHWA */
+ [448+0x10] = 0x25B, /* U+0190: LATIN CAPITAL LETTER OPEN E */
+ [448+0x11] = 0x192, /* U+0191: LATIN CAPITAL LETTER F WITH HOOK */
+ [448+0x13] = 0x260, /* U+0193: LATIN CAPITAL LETTER G WITH HOOK */
+ [448+0x14] = 0x263, /* U+0194: LATIN CAPITAL LETTER GAMMA */
+ [448+0x16] = 0x269, /* U+0196: LATIN CAPITAL LETTER IOTA */
+ [448+0x17] = 0x268, /* U+0197: LATIN CAPITAL LETTER I WITH STROKE */
+ [448+0x18] = 0x199, /* U+0198: LATIN CAPITAL LETTER K WITH HOOK */
+ [448+0x1C] = 0x26F, /* U+019C: LATIN CAPITAL LETTER TURNED M */
+ [448+0x1D] = 0x272, /* U+019D: LATIN CAPITAL LETTER N WITH LEFT
HOOK */
+ [448+0x1F] = 0x275, /* U+019F: LATIN CAPITAL LETTER O WITH MIDDLE
TILDE */
+ [448+0x20] = 0x1A1, /* U+01A0: LATIN CAPITAL LETTER O WITH HORN */
+ [448+0x22] = 0x1A3, /* U+01A2: LATIN CAPITAL LETTER OI */
+ [448+0x24] = 0x1A5, /* U+01A4: LATIN CAPITAL LETTER P WITH HOOK */
+ [448+0x26] = 0x280, /* U+01A6: LATIN LETTER YR */
+ [448+0x27] = 0x1A8, /* U+01A7: LATIN CAPITAL LETTER TONE TWO */
+ [448+0x29] = 0x283, /* U+01A9: LATIN CAPITAL LETTER ESH */
+ [448+0x2C] = 0x1AD, /* U+01AC: LATIN CAPITAL LETTER T WITH HOOK */
+ [448+0x2E] = 0x288, /* U+01AE: LATIN CAPITAL LETTER T WITH
RETROFLEX HOOK */
+ [448+0x2F] = 0x1B0, /* U+01AF: LATIN CAPITAL LETTER U WITH HORN */
+ [448+0x31] = 0x28A, /* U+01B1: LATIN CAPITAL LETTER UPSILON */
+ [448+0x32] = 0x28B, /* U+01B2: LATIN CAPITAL LETTER V WITH HOOK */
+ [448+0x33] = 0x1B4, /* U+01B3: LATIN CAPITAL LETTER Y WITH HOOK */
+ [448+0x35] = 0x1B6, /* U+01B5: LATIN CAPITAL LETTER Z WITH STROKE */
+ [448+0x37] = 0x292, /* U+01B7: LATIN CAPITAL LETTER EZH */
+ [448+0x38] = 0x1B9, /* U+01B8: LATIN CAPITAL LETTER EZH REVERSED */
+ [448+0x3C] = 0x1BD, /* U+01BC: LATIN CAPITAL LETTER TONE FIVE */
+ [0xC7] = 512 - 0x80, /* 307 ... */
+ [512+0x4] = 0x1C6, /* U+01C4: LATIN CAPITAL LETTER DZ WITH CARON */
+ [512+0x5] = 0x1C6, /* U+01C5: LATIN CAPITAL LETTER D WITH SMALL
LETTER Z WITH CARON */
+ [512+0x7] = 0x1C9, /* U+01C7: LATIN CAPITAL LETTER LJ */
+ [512+0x8] = 0x1C9, /* U+01C8: LATIN CAPITAL LETTER L WITH SMALL
LETTER J */
+ [512+0xA] = 0x1CC, /* U+01CA: LATIN CAPITAL LETTER NJ */
+ [512+0xB] = 0x1CC, /* U+01CB: LATIN CAPITAL LETTER N WITH SMALL
LETTER J */
+ [512+0xD] = 0x1CE, /* U+01CD: LATIN CAPITAL LETTER A WITH CARON */
+ [512+0xF] = 0x1D0, /* U+01CF: LATIN CAPITAL LETTER I WITH CARON */
+ [512+0x11] = 0x1D2, /* U+01D1: LATIN CAPITAL LETTER O WITH CARON */
+ [512+0x13] = 0x1D4, /* U+01D3: LATIN CAPITAL LETTER U WITH CARON */
+ [512+0x15] = 0x1D6, /* U+01D5: LATIN CAPITAL LETTER U WITH
DIAERESIS AND MACRON */
+ [512+0x17] = 0x1D8, /* U+01D7: LATIN CAPITAL LETTER U WITH
DIAERESIS AND ACUTE */
+ [512+0x19] = 0x1DA, /* U+01D9: LATIN CAPITAL LETTER U WITH
DIAERESIS AND CARON */
+ [512+0x1B] = 0x1DC, /* U+01DB: LATIN CAPITAL LETTER U WITH
DIAERESIS AND GRAVE */
+ [512+0x1E] = 0x1DF, /* U+01DE: LATIN CAPITAL LETTER A WITH
DIAERESIS AND MACRON */
+ [512+0x20] = 0x1E1, /* U+01E0: LATIN CAPITAL LETTER A WITH DOT
ABOVE AND MACRON */
+ [512+0x22] = 0x1E3, /* U+01E2: LATIN CAPITAL LETTER AE WITH MACRON
*/
+ [512+0x24] = 0x1E5, /* U+01E4: LATIN CAPITAL LETTER G WITH STROKE */
+ [512+0x26] = 0x1E7, /* U+01E6: LATIN CAPITAL LETTER G WITH CARON */
+ [512+0x28] = 0x1E9, /* U+01E8: LATIN CAPITAL LETTER K WITH CARON */
+ [512+0x2A] = 0x1EB, /* U+01EA: LATIN CAPITAL LETTER O WITH OGONEK */
+ [512+0x2C] = 0x1ED, /* U+01EC: LATIN CAPITAL LETTER O WITH OGONEK
AND MACRON */
+ [512+0x2E] = 0x1EF, /* U+01EE: LATIN CAPITAL LETTER EZH WITH CARON
*/
+ [512+0x31] = 0x1F3, /* U+01F1: LATIN CAPITAL LETTER DZ */
+ [512+0x32] = 0x1F3, /* U+01F2: LATIN CAPITAL LETTER D WITH SMALL
LETTER Z */
+ [512+0x34] = 0x1F5, /* U+01F4: LATIN CAPITAL LETTER G WITH ACUTE */
+ [512+0x36] = 0x195, /* U+01F6: LATIN CAPITAL LETTER HWAIR */
+ [512+0x37] = 0x1BF, /* U+01F7: LATIN CAPITAL LETTER WYNN */
+ [512+0x38] = 0x1F9, /* U+01F8: LATIN CAPITAL LETTER N WITH GRAVE */
+ [512+0x3A] = 0x1FB, /* U+01FA: LATIN CAPITAL LETTER A WITH RING
ABOVE AND ACUTE */
+ [512+0x3C] = 0x1FD, /* U+01FC: LATIN CAPITAL LETTER AE WITH ACUTE */
+ [512+0x3E] = 0x1FF, /* U+01FE: LATIN CAPITAL LETTER O WITH STROKE
AND ACUTE */
+ [0xC8] = 576 - 0x80, /* 310 ... */
+ [576+0x0] = 0x201, /* U+0200: LATIN CAPITAL LETTER A WITH DOUBLE
GRAVE */
+ [576+0x2] = 0x203, /* U+0202: LATIN CAPITAL LETTER A WITH INVERTED
BREVE */
+ [576+0x4] = 0x205, /* U+0204: LATIN CAPITAL LETTER E WITH DOUBLE
GRAVE */
+ [576+0x6] = 0x207, /* U+0206: LATIN CAPITAL LETTER E WITH INVERTED
BREVE */
+ [576+0x8] = 0x209, /* U+0208: LATIN CAPITAL LETTER I WITH DOUBLE
GRAVE */
+ [576+0xA] = 0x20B, /* U+020A: LATIN CAPITAL LETTER I WITH INVERTED
BREVE */
+ [576+0xC] = 0x20D, /* U+020C: LATIN CAPITAL LETTER O WITH DOUBLE
GRAVE */
+ [576+0xE] = 0x20F, /* U+020E: LATIN CAPITAL LETTER O WITH INVERTED
BREVE */
+ [576+0x10] = 0x211, /* U+0210: LATIN CAPITAL LETTER R WITH DOUBLE
GRAVE */
+ [576+0x12] = 0x213, /* U+0212: LATIN CAPITAL LETTER R WITH INVERTED
BREVE */
+ [576+0x14] = 0x215, /* U+0214: LATIN CAPITAL LETTER U WITH DOUBLE
GRAVE */
+ [576+0x16] = 0x217, /* U+0216: LATIN CAPITAL LETTER U WITH INVERTED
BREVE */
+ [576+0x18] = 0x219, /* U+0218: LATIN CAPITAL LETTER S WITH COMMA
BELOW */
+ [576+0x1A] = 0x21B, /* U+021A: LATIN CAPITAL LETTER T WITH COMMA
BELOW */
+ [576+0x1C] = 0x21D, /* U+021C: LATIN CAPITAL LETTER YOGH */
+ [576+0x1E] = 0x21F, /* U+021E: LATIN CAPITAL LETTER H WITH CARON */
+ [576+0x20] = 0x19E, /* U+0220: LATIN CAPITAL LETTER N WITH LONG
RIGHT LEG */
+ [576+0x22] = 0x223, /* U+0222: LATIN CAPITAL LETTER OU */
+ [576+0x24] = 0x225, /* U+0224: LATIN CAPITAL LETTER Z WITH HOOK */
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]