Changeset: 1e06a9390269 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1e06a9390269
Modified Files:
        gdk/gdk_string.c
Branch: ascii-flag
Log Message:

Add some comments.


diffs (43 lines):

diff --git a/gdk/gdk_string.c b/gdk/gdk_string.c
--- a/gdk/gdk_string.c
+++ b/gdk/gdk_string.c
@@ -1453,9 +1453,13 @@ GDKanalytical_str_group_concat(BAT *r, B
        return GDK_FAIL;
 }
 
-/* The two case conversion tables are specially crafted from the
+/* The three case conversion tables are specially crafted from the
  * UnicodeData.txt file for efficient lookup.
  *
+ * The lowercase and uppercase tables are derived from the
+ * UnicodeData.txt file (the respective columns from that file), the
+ * casefold table is derived from the CaseFold.txt file.
+ *
  * For the first byte of a UTF-8 encoding, use the value as index into
  * the table.  If the value is zero, there are no conversions for any
  * UTF-8 string starting with this byte (this includes both multi-byte
@@ -1469,7 +1473,10 @@ GDKanalytical_str_group_concat(BAT *r, B
  * The process then repeats: if zero, no conversions for any sequence
  * starting with the bytes looked up so far, if non-zero, if this is the
  * last byte of a sequence, it is the converted codepoint, and otherwise
- * a (new) offset into the same table. */
+ * a (new) offset into the same table.
+ * Only for the casefold table, if the converted codepoint is negative,
+ * it is actually an escape into the specialcase table.  The absolute
+ * value is the index. */
 static const char *const specialcase[] = {
        NULL,
        "ss",
@@ -7086,6 +7093,12 @@ GDKstrcasestr(const char *haystack, cons
        return NULL;
 }
 
+/* The asciify table uses the same technique as the case conversion
+ * tables, except that the value that is calculated is not a codepoint.
+ * Instead it is the index into the valtab table which contains the
+ * string that is to be used to replace the asciified character.
+ * This combination of tables is derived from the command
+ * ``iconv -futf-8 -tASCII//TRANSLIT`` */
 static const char *const valtab[] = {
        NULL,
        [1] = " ",
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to