Update of /cvsroot/monetdb/MonetDB5/src/modules/atoms
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv21377/src/modules/atoms

Modified Files:
        str.mx 
Log Message:
aligned with M4 (updated unicode maps)


Index: str.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/modules/atoms/str.mx,v
retrieving revision 1.66
retrieving revision 1.67
diff -u -d -r1.66 -r1.67
--- str.mx      31 May 2007 20:43:12 -0000      1.66
+++ str.mx      1 Jun 2007 09:21:39 -0000       1.67
@@ -236,7 +236,7 @@
 #endif
 
 str_export bat *strPrelude(void);
-str_export str strEpilogue(int *ret);
+str_export str strEpilogue(void);
 str_export str STRtostr(str *res, str *src);
 str_export str STRConcat(str *res, str *val1, str *val2);
 str_export str STRLength(int *res, str *arg1);
@@ -346,11 +346,9 @@
 The Unicode case conversion implementation in Monet fills a mapping BAT of 
int,int combinations,
 in which we perform high-performance hash-lookup (all code inlined).
 @c
-/* This table was generated from the Unicode 3.2.0 spec.
+/* This table was generated from the Unicode 5.0.0 spec.
    The table is generated by using the codes for conversion to lower
-   case and for conversion to title case (note: not to upper case).
-   Title case is used since the interface to convert to upper case
-   converts the whole string.
+   case and for conversion to title case and upper case.
    A few code points have been moved in order to get reasonable
    conversions (if two code points are converted to the same value,
    the first one in this table wins).  The code points that have
@@ -367,6 +365,7 @@
    lower case <-> title case
    lower case <-  upper case
    upper case  -> title case
+   The conversion title case -> upper case was removed
 
    The relevant code points are:
    U+01C4 (LATIN CAPITAL LETTER DZ WITH CARON)
@@ -382,14 +381,16 @@
    U+01F2 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z)
    U+01F3 (LATIN SMALL LETTER DZ)
 
-   The core awk script used is:
-       $15 != "" && $15 != $1 {printf "{0x%s,0x%s,},\n",$1,$15}
-       $14 != "" && $14 != $1 {printf "{0x%s,0x%s,},\n",$14,$1}
+   The script used was basically:
+(cut -d\; -f1,14 UnicodeData.txt | sed -n 's/\(.*\);\(..*\)/\2;\1/p'
+ cut -d\; -f1,15 UnicodeData.txt | grep -v ';$'
+ cut -d\; -f1,13 UnicodeData.txt | grep -v ';$'
+) | grep -v '^\([^ ]*\);\1$' | sort -t\; -u | sed 
's/\(.*\);\(.*\)/{0x\1,0x\2,},/'
    with some hand munging afterward.  The data file is UnicodeData.txt
    from http://www.unicode.org/.
  */
 struct UTF8_lower_upper {
-       unsigned short lower, upper;
+       unsigned int lower, upper;
 } UTF8_lower_upper[] = {
        { 0x0061, 0x0041, },
        { 0x0062, 0x0042, },
@@ -515,6 +516,7 @@
        { 0x017C, 0x017B, },
        { 0x017E, 0x017D, },
        { 0x017F, 0x0053, },
+       { 0x0180, 0x0243, },
        { 0x0183, 0x0182, },
        { 0x0185, 0x0184, },
        { 0x0188, 0x0187, },
@@ -522,6 +524,7 @@
        { 0x0192, 0x0191, },
        { 0x0195, 0x01F6, },
        { 0x0199, 0x0198, },
+       { 0x019A, 0x023D, },
        { 0x019E, 0x0220, },
        { 0x01A1, 0x01A0, },
        { 0x01A3, 0x01A2, },
@@ -534,15 +537,15 @@
        { 0x01B9, 0x01B8, },
        { 0x01BD, 0x01BC, },
        { 0x01BF, 0x01F7, },
-       { 0x01C6, 0x01C4, },
        { 0x01C6, 0x01C5, },
-       { 0x01C5, 0x01C4, },
-       { 0x01C9, 0x01C7, },
+       { 0x01C6, 0x01C4, },
+       { 0x01C4, 0x01C5, },
        { 0x01C9, 0x01C8, },
-       { 0x01C8, 0x01C7, },
-       { 0x01CC, 0x01CA, },
+       { 0x01C9, 0x01C7, },
+       { 0x01C7, 0x01C8, },
        { 0x01CC, 0x01CB, },
-       { 0x01CB, 0x01CA, },
+       { 0x01CC, 0x01CA, },
+       { 0x01CA, 0x01CB, },
        { 0x01CE, 0x01CD, },
        { 0x01D0, 0x01CF, },
        { 0x01D2, 0x01D1, },
@@ -561,9 +564,9 @@
        { 0x01EB, 0x01EA, },
        { 0x01ED, 0x01EC, },
        { 0x01EF, 0x01EE, },
-       { 0x01F3, 0x01F1, },
        { 0x01F3, 0x01F2, },
-       { 0x01F2, 0x01F1, },
+       { 0x01F3, 0x01F1, },
+       { 0x01F1, 0x01F2, },
        { 0x01F5, 0x01F4, },
        { 0x01F9, 0x01F8, },
        { 0x01FB, 0x01FA, },
@@ -594,6 +597,13 @@
        { 0x022F, 0x022E, },
        { 0x0231, 0x0230, },
        { 0x0233, 0x0232, },
+       { 0x023C, 0x023B, },
+       { 0x0242, 0x0241, },
+       { 0x0247, 0x0246, },
+       { 0x0249, 0x0248, },
+       { 0x024B, 0x024A, },
+       { 0x024D, 0x024C, },
+       { 0x024F, 0x024E, },
        { 0x0253, 0x0181, },
        { 0x0254, 0x0186, },
        { 0x0256, 0x0189, },
@@ -604,16 +614,23 @@
        { 0x0263, 0x0194, },
        { 0x0268, 0x0197, },
        { 0x0269, 0x0196, },
+       { 0x026B, 0x2C62, },
        { 0x026F, 0x019C, },
        { 0x0272, 0x019D, },
        { 0x0275, 0x019F, },
+       { 0x027D, 0x2C64, },
        { 0x0280, 0x01A6, },
        { 0x0283, 0x01A9, },
        { 0x0288, 0x01AE, },
+       { 0x0289, 0x0244, },
        { 0x028A, 0x01B1, },
        { 0x028B, 0x01B2, },
+       { 0x028C, 0x0245, },
        { 0x0292, 0x01B7, },
        { 0x03B9, 0x0399, },
+       { 0x037B, 0x03FD, },
+       { 0x037C, 0x03FE, },
+       { 0x037D, 0x03FF, },
        { 0x03AC, 0x0386, },
        { 0x03AD, 0x0388, },
        { 0x03AE, 0x0389, },
@@ -771,6 +788,7 @@
        { 0x04CA, 0x04C9, },
        { 0x04CC, 0x04CB, },
        { 0x04CE, 0x04CD, },
+       { 0x04CF, 0x04C0, },
        { 0x04D1, 0x04D0, },
        { 0x04D3, 0x04D2, },
        { 0x04D5, 0x04D4, },
@@ -790,7 +808,11 @@
        { 0x04F1, 0x04F0, },
        { 0x04F3, 0x04F2, },
        { 0x04F5, 0x04F4, },
+       { 0x04F7, 0x04F6, },
        { 0x04F9, 0x04F8, },
+       { 0x04FB, 0x04FA, },
+       { 0x04FD, 0x04FC, },
+       { 0x04FF, 0x04FE, },
        { 0x0501, 0x0500, },
        { 0x0503, 0x0502, },
        { 0x0505, 0x0504, },
@@ -799,6 +821,8 @@
        { 0x050B, 0x050A, },
        { 0x050D, 0x050C, },
        { 0x050F, 0x050E, },
+       { 0x0511, 0x0510, },
+       { 0x0513, 0x0512, },
        { 0x0561, 0x0531, },
        { 0x0562, 0x0532, },
        { 0x0563, 0x0533, },
@@ -837,6 +861,7 @@
        { 0x0584, 0x0554, },
        { 0x0585, 0x0555, },
        { 0x0586, 0x0556, },
+       { 0x1D7D, 0x2C63, },
        { 0x1E01, 0x1E00, },
        { 0x1E03, 0x1E02, },
        { 0x1E05, 0x1E04, },
@@ -1055,6 +1080,7 @@
        { 0x1FE1, 0x1FE9, },
        { 0x1FE5, 0x1FEC, },
        { 0x1FF3, 0x1FFC, },
+       { 0x214E, 0x2132, },
        { 0x2170, 0x2160, },
        { 0x2171, 0x2161, },
        { 0x2172, 0x2162, },
@@ -1071,6 +1097,7 @@
        { 0x217D, 0x216D, },
        { 0x217E, 0x216E, },
        { 0x217F, 0x216F, },
+       { 0x2184, 0x2183, },
        { 0x24D0, 0x24B6, },
        { 0x24D1, 0x24B7, },
        { 0x24D2, 0x24B8, },
@@ -1097,6 +1124,148 @@
        { 0x24E7, 0x24CD, },
        { 0x24E8, 0x24CE, },
        { 0x24E9, 0x24CF, },
+       { 0x2C30, 0x2C00, },
+       { 0x2C31, 0x2C01, },
+       { 0x2C32, 0x2C02, },
+       { 0x2C33, 0x2C03, },
+       { 0x2C34, 0x2C04, },
+       { 0x2C35, 0x2C05, },
+       { 0x2C36, 0x2C06, },
+       { 0x2C37, 0x2C07, },
+       { 0x2C38, 0x2C08, },
+       { 0x2C39, 0x2C09, },
+       { 0x2C3A, 0x2C0A, },
+       { 0x2C3B, 0x2C0B, },
+       { 0x2C3C, 0x2C0C, },
+       { 0x2C3D, 0x2C0D, },
+       { 0x2C3E, 0x2C0E, },
+       { 0x2C3F, 0x2C0F, },
+       { 0x2C40, 0x2C10, },
+       { 0x2C41, 0x2C11, },
+       { 0x2C42, 0x2C12, },
+       { 0x2C43, 0x2C13, },
+       { 0x2C44, 0x2C14, },
+       { 0x2C45, 0x2C15, },
+       { 0x2C46, 0x2C16, },
+       { 0x2C47, 0x2C17, },
+       { 0x2C48, 0x2C18, },
+       { 0x2C49, 0x2C19, },
+       { 0x2C4A, 0x2C1A, },
+       { 0x2C4B, 0x2C1B, },
+       { 0x2C4C, 0x2C1C, },
+       { 0x2C4D, 0x2C1D, },
+       { 0x2C4E, 0x2C1E, },
+       { 0x2C4F, 0x2C1F, },
+       { 0x2C50, 0x2C20, },
+       { 0x2C51, 0x2C21, },
+       { 0x2C52, 0x2C22, },
+       { 0x2C53, 0x2C23, },
+       { 0x2C54, 0x2C24, },
+       { 0x2C55, 0x2C25, },
+       { 0x2C56, 0x2C26, },
+       { 0x2C57, 0x2C27, },
+       { 0x2C58, 0x2C28, },
+       { 0x2C59, 0x2C29, },
+       { 0x2C5A, 0x2C2A, },
+       { 0x2C5B, 0x2C2B, },
+       { 0x2C5C, 0x2C2C, },
+       { 0x2C5D, 0x2C2D, },
+       { 0x2C5E, 0x2C2E, },
+       { 0x2C61, 0x2C60, },
+       { 0x2C65, 0x023A, },
+       { 0x2C66, 0x023E, },
+       { 0x2C68, 0x2C67, },
+       { 0x2C6A, 0x2C69, },
+       { 0x2C6C, 0x2C6B, },
+       { 0x2C76, 0x2C75, },
+       { 0x2C81, 0x2C80, },
+       { 0x2C83, 0x2C82, },
+       { 0x2C85, 0x2C84, },
+       { 0x2C87, 0x2C86, },
+       { 0x2C89, 0x2C88, },
+       { 0x2C8B, 0x2C8A, },
+       { 0x2C8D, 0x2C8C, },
+       { 0x2C8F, 0x2C8E, },
+       { 0x2C91, 0x2C90, },
+       { 0x2C93, 0x2C92, },
+       { 0x2C95, 0x2C94, },
+       { 0x2C97, 0x2C96, },
+       { 0x2C99, 0x2C98, },
+       { 0x2C9B, 0x2C9A, },
+       { 0x2C9D, 0x2C9C, },
+       { 0x2C9F, 0x2C9E, },
+       { 0x2CA1, 0x2CA0, },
+       { 0x2CA3, 0x2CA2, },
+       { 0x2CA5, 0x2CA4, },
+       { 0x2CA7, 0x2CA6, },
+       { 0x2CA9, 0x2CA8, },
+       { 0x2CAB, 0x2CAA, },
+       { 0x2CAD, 0x2CAC, },
+       { 0x2CAF, 0x2CAE, },
+       { 0x2CB1, 0x2CB0, },
+       { 0x2CB3, 0x2CB2, },
+       { 0x2CB5, 0x2CB4, },
+       { 0x2CB7, 0x2CB6, },
+       { 0x2CB9, 0x2CB8, },
+       { 0x2CBB, 0x2CBA, },
+       { 0x2CBD, 0x2CBC, },
+       { 0x2CBF, 0x2CBE, },
+       { 0x2CC1, 0x2CC0, },
+       { 0x2CC3, 0x2CC2, },
+       { 0x2CC5, 0x2CC4, },
+       { 0x2CC7, 0x2CC6, },
+       { 0x2CC9, 0x2CC8, },
+       { 0x2CCB, 0x2CCA, },
+       { 0x2CCD, 0x2CCC, },
+       { 0x2CCF, 0x2CCE, },
+       { 0x2CD1, 0x2CD0, },
+       { 0x2CD3, 0x2CD2, },
+       { 0x2CD5, 0x2CD4, },
+       { 0x2CD7, 0x2CD6, },
+       { 0x2CD9, 0x2CD8, },
+       { 0x2CDB, 0x2CDA, },
+       { 0x2CDD, 0x2CDC, },
+       { 0x2CDF, 0x2CDE, },
+       { 0x2CE1, 0x2CE0, },
+       { 0x2CE3, 0x2CE2, },
+       { 0x2D00, 0x10A0, },
+       { 0x2D01, 0x10A1, },
+       { 0x2D02, 0x10A2, },
+       { 0x2D03, 0x10A3, },
+       { 0x2D04, 0x10A4, },
+       { 0x2D05, 0x10A5, },
+       { 0x2D06, 0x10A6, },
+       { 0x2D07, 0x10A7, },
+       { 0x2D08, 0x10A8, },
+       { 0x2D09, 0x10A9, },
+       { 0x2D0A, 0x10AA, },
+       { 0x2D0B, 0x10AB, },
+       { 0x2D0C, 0x10AC, },
+       { 0x2D0D, 0x10AD, },
+       { 0x2D0E, 0x10AE, },
+       { 0x2D0F, 0x10AF, },
+       { 0x2D10, 0x10B0, },
+       { 0x2D11, 0x10B1, },
+       { 0x2D12, 0x10B2, },
+       { 0x2D13, 0x10B3, },
+       { 0x2D14, 0x10B4, },
+       { 0x2D15, 0x10B5, },
+       { 0x2D16, 0x10B6, },
+       { 0x2D17, 0x10B7, },
+       { 0x2D18, 0x10B8, },
+       { 0x2D19, 0x10B9, },
+       { 0x2D1A, 0x10BA, },
+       { 0x2D1B, 0x10BB, },
+       { 0x2D1C, 0x10BC, },
+       { 0x2D1D, 0x10BD, },
+       { 0x2D1E, 0x10BE, },
+       { 0x2D1F, 0x10BF, },
+       { 0x2D20, 0x10C0, },
+       { 0x2D21, 0x10C1, },
+       { 0x2D22, 0x10C2, },
+       { 0x2D23, 0x10C3, },
+       { 0x2D24, 0x10C4, },
+       { 0x2D25, 0x10C5, },
        { 0xFF41, 0xFF21, },
        { 0xFF42, 0xFF22, },
        { 0xFF43, 0xFF23, },
@@ -1123,9 +1292,6 @@
        { 0xFF58, 0xFF38, },
        { 0xFF59, 0xFF39, },
        { 0xFF5A, 0xFF3A, },
-#if 0
-           /* these codes don't fit in an unsigned short, else they
-              should be included */
        { 0x10428, 0x10400, },
        { 0x10429, 0x10401, },
        { 0x1042A, 0x10402, },
@@ -1166,7 +1332,6 @@
        { 0x1044D, 0x10425, },
        { 0x1044E, 0x10426, },
        { 0x1044F, 0x10427, },
-#endif
 };
 
 #define UTF8_CONVERSIONS (sizeof(UTF8_lower_upper) / 
sizeof(UTF8_lower_upper[0]))
@@ -1195,13 +1360,10 @@
 }
 
 str 
-strEpilogue(int *ret)
+strEpilogue(void)
 {
-       (void)ret;
-       if (UTF8_toupperBat) {
-               BBPreclaim(UTF8_toupperBat);
-               BBPreclaim(UTF8_tolowerBat);
-       }
+       if (UTF8_toupperBat)
+               BBPunfix(UTF8_toupperBat->batCacheid);
        return MAL_SUCCEED;
 }
 
@@ -1410,8 +1572,7 @@
 {
        str valstr = NULL;
        size_t l1;
-       int l2 = 0;
-       char buf[7], *p = buf;
+       int l2 = 0;             /* use int because of atomToStr */
 
        RETURN_NIL_IF(strNil(s) || ATOMcmp(t, val, ATOMnilptr(t)) == 0, 
TYPE_str);
        if (t <= 0)
@@ -1419,23 +1580,32 @@
        l1 = strlen(s);
        if (t != TYPE_str) {
                if (t == TYPE_chr) {
+                       char buf[7], *p = buf;
+
                        /* put value in int to avoid warning from compiler */
-                       l2 = * (char *) val;
+                       if (* (chr *) val == chr_nil)
+                               l2 = -1; /* indicate nil */
+                       else
+                               l2 = * (unsigned char *) val;
                        @:UTF8_PUTCHAR(l2,p)@
-                       l2 = p - buf;
+                       l2 = (int) (p - buf);
                        val = (ptr) buf;
                } else {
                        BATatoms[t].atomToStr(&valstr, &l2, val);
                        val = (ptr) valstr;
                }
        } else {
-               l2 = strlen((str) val);
+               l2 = (int) strlen((str) val);
        }
-       *res = (str) GDKmalloc(l1 + l2 + 1);
-       memcpy(*res, s, l1);
-       memcpy(*res + l1, (str) val, l2);
-       (*res)[l1 + l2] = '\0';
-       if (valstr)
+       if (* (str) val == chr_nil || *s == chr_nil)
+               *res = GDKstrdup(str_nil);
+       else {
+               *res = (str) GDKmalloc(l1 + l2 + 1);
+               memcpy(*res, s, l1);
+               memcpy(*res + l1, (str) val, l2);
+               (*res)[l1 + l2] = '\0';
+       }
+       if (valstr && (str) valstr != str_nil)
                GDKfree(valstr);
        return GDK_SUCCEED;
 }
@@ -1443,6 +1613,7 @@
 int
 strLength(int *res, str s)
 {
+/* 64bit: should return wrd */
        RETURN_NIL_IF(strNil(s), TYPE_int);
        *res = UTF8_strlen(s);
        return GDK_SUCCEED;
@@ -1451,6 +1622,7 @@
 int
 strBytes(int *res, str s)
 {
+/* 64bit: should return wrd */
        *res = strlen(s);
        return GDK_SUCCEED;
 }
@@ -1518,6 +1690,7 @@
 int
 strWChrAt(int *res, str val, int *at)
 {
+/* 64bit: should have wrd arg */
        unsigned char *s = (unsigned char *) val;
 
        RETURN_NIL_IF(strNil(val) || *at == int_nil || *at < 0, TYPE_chr);
@@ -1531,11 +1704,11 @@
 codeset(str *res)
 {
 #ifdef HAVE_NL_LANGINFO
-       char *codeset = nl_langinfo(CODESET);
+       char *code_set = nl_langinfo(CODESET);
 
-       if (!codeset)
+       if (!code_set)
                return GDK_FAIL;
-       *res = GDKstrdup(codeset);
+       *res = GDKstrdup(code_set);
        return GDK_SUCCEED;
 #else
        *res = GDKstrdup("UTF-8");
@@ -1693,6 +1866,7 @@
 int
 strStrSearch(int *res, str s, str s2)
 {
+/* 64bit: should return wrd */
        char *p;
 
        RETURN_NIL_IF(strNil(s) || strNil(s2), TYPE_int);
@@ -1706,6 +1880,7 @@
 int
 strReverseStrSearch(int *res, str s, str s2)
 {
+/* 64bit: should return wrd */
        size_t len, slen;
        char *p, *q;
        size_t i;
@@ -1729,6 +1904,7 @@
 int
 strChrSearch(int *res, str s, chr *c)
 {
+/* 64bit: should return wrd */
        int i = (int) *c;
        char buf[7], *p = buf;
 
@@ -1741,6 +1917,7 @@
 int
 strReverseChrSearch(int *res, str s, chr *c)
 {
+/* 64bit: should return wrd */
        int i = (int) *c;
        char buf[7], *p = buf;
 


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to