Changeset: aeccc9b80e2b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/aeccc9b80e2b
Branch: default
Log Message:
Merge with Mar2025 branch.
diffs (truncated from 1028 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1356,9 +1356,9 @@ char *prompt_getlogin(void);
struct dirent *readdir(DIR *dir);
void rewinddir(DIR *dir);
char *simple_prompt(const char *prompt, int maxlen, int echo, const char *def);
+char *utf16toutf8(const utf16_t *src);
const uint8_t utf8d[364];
-wchar_t *utf8towchar(const char *src);
-char *wchartoutf8(const wchar_t *src);
+utf16_t *utf8toutf16(const char *src);
int winerror(int);
# sql
diff --git a/clients/mapiclient/mclient.c b/clients/mapiclient/mclient.c
--- a/clients/mapiclient/mclient.c
+++ b/clients/mapiclient/mclient.c
@@ -3046,11 +3046,11 @@ cvfilename(const char *filename)
}
}
#endif
- /* couldn't use iconv for whatever reason; alternative is to
- * use utf8towchar above to convert to a wide character string
- * (wcs) and convert that to the locale-specific encoding
- * using wcstombs or wcsrtombs (but preferably only if the
- * locale's encoding is not UTF-8) */
+ /* if encoding is set, we couldn't use iconv for whatever reason;
+ * alternative is to convert to a wide character string (wcs) and
+ * convert that to the locale-specific encoding using wcstombs or
+ * wcsrtombs (but preferably only if the locale's encoding is not
+ * UTF-8) */
return strdup(filename);
}
@@ -3405,7 +3405,7 @@ main(int argc, char **argv)
exit(1);
}
for (int i = 0; i < argc; i++) {
- if ((argv[i] = wchartoutf8(wargv[i])) == NULL) {
+ if ((argv[i] = utf16toutf8(wargv[i])) == NULL) {
fprintf(stderr, "cannot convert argument to UTF-8\n");
exit(1);
}
diff --git a/clients/mapiclient/msqldump.c b/clients/mapiclient/msqldump.c
--- a/clients/mapiclient/msqldump.c
+++ b/clients/mapiclient/msqldump.c
@@ -108,7 +108,7 @@ main(int argc, char **argv)
exit(1);
}
for (int i = 0; i < argc; i++) {
- if ((argv[i] = wchartoutf8(wargv[i])) == NULL) {
+ if ((argv[i] = utf16toutf8(wargv[i])) == NULL) {
fprintf(stderr, "cannot convert argument to UTF-8\n");
exit(1);
}
diff --git a/cmake/monetdb-defines.cmake b/cmake/monetdb-defines.cmake
--- a/cmake/monetdb-defines.cmake
+++ b/cmake/monetdb-defines.cmake
@@ -308,7 +308,7 @@ macro(monetdb_configure_misc)
if(NOT DEFINED PYTHON3_LIBDIR)
# Used for installing testing python module (don't pass a location, else
we need to strip this again)
- execute_process(COMMAND "${Python3_EXECUTABLE}" "-c" "import sysconfig;
print((sysconfig.get_path('purelib', vars={'base':''}, scheme='rpm_prefix') if
'rpm_prefix' in sysconfig.get_scheme_names() else sysconfig.get_path('purelib',
vars={'base':''}))[1:])"
+ execute_process(COMMAND "${Python3_EXECUTABLE}" "-c" "import sysconfig;
print((sysconfig.get_path('purelib', vars={'base':''}, scheme='rpm_prefix') if
'rpm_prefix' in sysconfig.get_scheme_names() else sysconfig.get_path('purelib',
vars={'base':''}))[1:].replace('\\\\','/'))"
RESULT_VARIABLE PY3_LIBDIR_CODE
OUTPUT_VARIABLE PYTHON3_SITEDIR
OUTPUT_STRIP_TRAILING_WHITESPACE)
diff --git a/common/stream/stdio_stream.c b/common/stream/stdio_stream.c
--- a/common/stream/stdio_stream.c
+++ b/common/stream/stdio_stream.c
@@ -183,8 +183,9 @@ open_stream(const char *restrict filenam
return NULL;
#ifdef NATIVE_WIN32
{
- wchar_t *wfname = utf8towchar(filename);
- wchar_t *wflags = utf8towchar(flags);
+ wchar_t *wfname = utf8toutf16(filename);
+ wchar_t *wflags = utf8toutf16(flags);
+ static_assert(SIZEOF_WCHAR_T == 2, "wchar_t on Windows expected
to be 2 bytes");
if (wfname != NULL && wflags != NULL)
fp = _wfopen(wfname, wflags);
else
@@ -384,7 +385,7 @@ file_remove(const char *filename)
int rc = -1;
#ifdef NATIVE_WIN32
- wchar_t *wfname = utf8towchar(filename);
+ wchar_t *wfname = utf8toutf16(filename);
if (wfname != NULL) {
rc = _wremove(wfname);
free(wfname);
diff --git a/common/utils/mutils.c b/common/utils/mutils.c
--- a/common/utils/mutils.c
+++ b/common/utils/mutils.c
@@ -53,25 +53,22 @@
#define BYTE_ORDER LITTLE_ENDIAN
#endif
-wchar_t *
-utf8towchar(const char *src)
+uint16_t *
+utf8toutf16(const char *src)
{
- wchar_t *dest;
+ uint16_t *dest;
size_t i = 0;
uint32_t state = 0, codepoint = 0;
if (src == NULL)
return NULL;
- /* count how many wchar_t's we need, while also checking for
+ /* count how many uint16_t's we need, while also checking for
* correctness of the input */
for (size_t j = 0; src[j]; j++) {
switch (decode(&state, &codepoint, (uint8_t) src[j])) {
case UTF8_ACCEPT:
- i++;
-#if SIZEOF_WCHAR_T == 2
- i += (codepoint > 0xFFFF);
-#endif
+ i += 1 + (codepoint > 0xFFFF);
break;
case UTF8_REJECT:
return NULL;
@@ -79,7 +76,7 @@ utf8towchar(const char *src)
break;
}
}
- dest = malloc((i + 1) * sizeof(wchar_t));
+ dest = malloc((i + 1) * sizeof(uint16_t));
if (dest == NULL)
return NULL;
/* go through the source string again, this time we can skip
@@ -88,16 +85,12 @@ utf8towchar(const char *src)
for (size_t j = 0; src[j]; j++) {
switch (decode(&state, &codepoint, (uint8_t) src[j])) {
case UTF8_ACCEPT:
-#if SIZEOF_WCHAR_T == 2
if (codepoint <= 0xFFFF) {
- dest[i++] = (wchar_t) codepoint;
+ dest[i++] = (uint16_t) codepoint;
} else {
- dest[i++] = (wchar_t) (0xD7C0 + (codepoint >>
10));
- dest[i++] = (wchar_t) (0xDC00 + (codepoint &
0x3FF));
+ dest[i++] = (uint16_t) (0xD7C0 + (codepoint >>
10));
+ dest[i++] = (uint16_t) (0xDC00 + (codepoint &
0x3FF));
}
-#else
- dest[i++] = (wchar_t) codepoint;
-#endif
break;
case UTF8_REJECT:
/* cannot happen because of first loop */
@@ -118,7 +111,7 @@ utf8towchar(const char *src)
}
char *
-wchartoutf8(const wchar_t *ws)
+utf16toutf8(const uint16_t *ws)
{
size_t len = 1;
for (size_t i = 0; ws[i]; i++) {
@@ -126,25 +119,14 @@ wchartoutf8(const wchar_t *ws)
len += 1;
else if (ws[i] <= 0x7FF)
len += 2;
- else if (
-#if SIZEOF_WCHAR_T == 2
- (ws[i] & 0xF800) != 0xD800
-#else
- ws[i] <= 0xFFFF
-#endif
- ) {
+ else if ((ws[i] & 0xF800) != 0xD800) {
assert((ws[i] & 0xF800) != 0xD800);
len += 3;
} else {
-#if SIZEOF_WCHAR_T == 2
assert((ws[i + 0] & 0xFC00) == 0xD800); /* high
surrogate */
assert((ws[i + 1] & 0xFC00) == 0xDC00); /* low
surrogate */
len += 4;
i++;
-#else
- assert(ws[i] <= 0x10FFFF);
- len += 4;
-#endif
}
}
unsigned char *us = malloc(len);
@@ -156,24 +138,14 @@ wchartoutf8(const wchar_t *ws)
else if (ws[i] <= 0x7FF) {
us[j++] = (unsigned char) (ws[i] >> 6 | 0xC0);
us[j++] = (unsigned char) ((ws[i] & 0x3F) |
0x80);
- } else if (
-#if SIZEOF_WCHAR_T == 2
- (ws[i] & 0xF800) != 0xD800
-#else
- ws[i] <= 0xFFFF
-#endif
- ) {
+ } else if ((ws[i] & 0xF800) != 0xD800) {
us[j++] = (unsigned char) (ws[i] >> 12 | 0xE0);
us[j++] = (unsigned char) (((ws[i] >> 6) &
0x3F) | 0x80);
us[j++] = (unsigned char) ((ws[i] & 0x3F) |
0x80);
} else {
uint32_t wc;
-#if SIZEOF_WCHAR_T == 2
wc = ((ws[i+0] & 0x03FF) + 0x40) << 10 |
(ws[i+1] & 0x03FF);
i++;
-#else
- wc = (uint32_t) ws[i];
-#endif
us[j++] = (unsigned char) (wc >> 18 | 0xF0);
us[j++] = (unsigned char) (((wc >> 12) & 0x3F)
| 0x80);
us[j++] = (unsigned char) (((wc >> 6) & 0x3F) |
0x80);
@@ -283,7 +255,8 @@ opendir(const char *dirname)
return NULL;
}
result->find_file_data = malloc(sizeof(WIN32_FIND_DATAW));
- result->dir_name = utf8towchar(dirname);
+ static_assert(SIZEOF_WCHAR_T == 2, "wchar_t on Windows expected to be 2
bytes");
+ result->dir_name = utf8toutf16(dirname);
if (result->find_file_data == NULL || result->dir_name == NULL) {
if (result->find_file_data)
free(result->find_file_data);
@@ -364,7 +337,7 @@ readdir(DIR *dir)
else if (!FindNextFileW(dir->find_file_handle,
(LPWIN32_FIND_DATAW) dir->find_file_data))
return NULL;
- base = wchartoutf8(basename(((LPWIN32_FIND_DATAW)
dir->find_file_data)->cFileName));
+ base = utf16toutf8(basename(((LPWIN32_FIND_DATAW)
dir->find_file_data)->cFileName));
if (base == NULL)
return NULL;
strcpy_len(dir->result.d_name, base, sizeof(dir->result.d_name));
@@ -457,7 +430,7 @@ MT_lockf(const char *filename, int mode)
inited = true; /* only time this is changed */
}
- if ((wfilename = utf8towchar(filename)) == NULL)
+ if ((wfilename = utf8toutf16(filename)) == NULL)
return -2;
ov = (OVERLAPPED) {0};
@@ -542,8 +515,8 @@ FILE *
MT_fopen(const char *filename, const char *mode)
{
wchar_t *wfilename, *wmode;
- wfilename = utf8towchar(filename);
- wmode = utf8towchar(mode);
+ wfilename = utf8toutf16(filename);
+ wmode = utf8toutf16(mode);
FILE *f = NULL;
if (wfilename != NULL && wmode != NULL && (f = _wfopen(wfilename,
wmode)) != NULL && strchr(mode, 'w') != NULL)
SetFileAttributesW(wfilename,
FILE_ATTRIBUTE_NOT_CONTENT_INDEXED);
@@ -555,7 +528,7 @@ MT_fopen(const char *filename, const cha
int
MT_open(const char *filename, int flags)
{
- wchar_t *wfilename = utf8towchar(filename);
+ wchar_t *wfilename = utf8toutf16(filename);
if (wfilename == NULL)
return -1;
int fd;
@@ -570,7 +543,7 @@ MT_open(const char *filename, int flags)
int
MT_stat(const char *pathname, struct _stat64 *st)
{
- wchar_t *wpathname = utf8towchar(pathname);
+ wchar_t *wpathname = utf8toutf16(pathname);
int ret;
if (wpathname == NULL)
return -1;
@@ -586,7 +559,7 @@ MT_stat(const char *pathname, struct _st
int
MT_rmdir(const char *pathname)
{
- wchar_t *wpathname = utf8towchar(pathname);
+ wchar_t *wpathname = utf8toutf16(pathname);
int ret;
if (wpathname == NULL)
return -1;
@@ -627,7 +600,7 @@ WMT_remove(const wchar_t *wpathname)
int
MT_remove(const char *pathname)
{
- wchar_t *wpathname = utf8towchar(pathname);
+ wchar_t *wpathname = utf8toutf16(pathname);
int ret;
if (wpathname == NULL)
return -1;
@@ -642,8 +615,8 @@ MT_rename(const char *old, const char *d
{
int ret = -1;
wchar_t *wold, *wdst;
- wold = utf8towchar(old);
- wdst = utf8towchar(dst);
+ wold = utf8toutf16(old);
+ wdst = utf8toutf16(dst);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]