Changeset: d5c304dce084 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d5c304dce084
Modified Files:
monetdb5/modules/atoms/str.c
Branch: ascii-flag
Log Message:
Add line and paragraph separators to the list of whitespace codepoints.
diffs (79 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -1057,9 +1057,9 @@ STRsplitpart(str *res, str *haystack, st
/* returns number of bytes to remove from left to strip the codepoints in rm */
static size_t
-lstrip(const char *s, size_t len, const int *rm, size_t nrm)
+lstrip(const char *s, size_t len, const uint32_t *rm, size_t nrm)
{
- int c;
+ uint32_t c;
size_t i, n, skip = 0;
while (len > 0) {
@@ -1082,9 +1082,9 @@ lstrip(const char *s, size_t len, const
/* returns the resulting length of s after stripping codepoints in rm
* from the right */
static size_t
-rstrip(const char *s, size_t len, const int *rm, size_t nrm)
+rstrip(const char *s, size_t len, const uint32_t *rm, size_t nrm)
{
- int c;
+ uint32_t c;
size_t i, n;
while (len > 0) {
@@ -1102,7 +1102,7 @@ rstrip(const char *s, size_t len, const
return len;
}
-const int whitespace[] = {
+const uint32_t whitespace[] = {
' ', /* space */
'\t', /* tab (character
tabulation) */
'\n', /* line feed */
@@ -1126,6 +1126,11 @@ const int whitespace[] = {
0x202F, /* narrow no-break
space */
0x205F, /* medium mathematical
space */
0x3000, /* ideographic space */
+/* below the code points that have the Unicode Zl (line separator) property */
+ 0x2028, /* line separator */
+/* below the code points that have the Unicode Zp (paragraph separator)
+ * property */
+ 0x2029, /* paragraph separator
*/
};
#define NSPACES (sizeof(whitespace) / sizeof(whitespace[0]))
@@ -1294,10 +1299,10 @@ str_strip2(str *buf, size_t *buflen, con
if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.strip2"))
!= MAL_SUCCEED)
return msg;
len = strlen(s);
- n = lstrip(s, len, *(int **) buf, n3);
+ n = lstrip(s, len, *(uint32_t **) buf, n3);
s += n;
len -= n;
- n = rstrip(s, len, *(int **) buf, n3);
+ n = rstrip(s, len, *(uint32_t **) buf, n3);
n++;
CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.strip2");
@@ -1351,7 +1356,7 @@ str_ltrim2(str *buf, size_t *buflen, con
if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2"))
!= MAL_SUCCEED)
return msg;
len = strlen(s);
- n = lstrip(s, len, *(int **) buf, n3);
+ n = lstrip(s, len, *(uint32_t **) buf, n3);
nallocate = len - n + 1;
CHECK_STR_BUFFER_LENGTH(buf, buflen, nallocate, "str.ltrim2");
@@ -1405,7 +1410,7 @@ str_rtrim2(str *buf, size_t *buflen, con
if ((msg = trimchars(buf, buflen, &n3, s2, n2, "str.ltrim2"))
!= MAL_SUCCEED)
return msg;
len = strlen(s);
- n = rstrip(s, len, *(int **) buf, n3);
+ n = rstrip(s, len, *(uint32_t **) buf, n3);
n++;
CHECK_STR_BUFFER_LENGTH(buf, buflen, n, "str.rtrim2");
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]