Hello, I have come around a strange situation when using a unicode string that has non normalized characters. The attached script 'initcap.sql' can reproduce the problem.
The attached patch can fix the issue. Regards, Juan José Santamaría Flecha
initcap.sql
Description: application/sql
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 755ca6e..9f8becf 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -96,6 +96,7 @@
#include "utils/memutils.h"
#include "utils/numeric.h"
#include "utils/pg_locale.h"
+#include "common/unicode_norm.h"
/* ----------
* Routines type
@@ -1864,7 +1865,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
else
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
- wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
+ if (!is_pg_wchar_combining(workspace[curr_char]))
+ wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
}
else
#endif
@@ -1873,7 +1875,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
workspace[curr_char] = towlower(workspace[curr_char]);
else
workspace[curr_char] = towupper(workspace[curr_char]);
- wasalnum = iswalnum(workspace[curr_char]);
+ if (!is_pg_wchar_combining(workspace[curr_char]))
+ wasalnum = iswalnum(workspace[curr_char]);
}
}
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c
index 89c5533..25b149b 100644
--- a/src/common/unicode_norm.c
+++ b/src/common/unicode_norm.c
@@ -435,3 +435,14 @@ unicode_normalize_kc(const pg_wchar *input)
return recomp_chars;
}
+
+bool
+is_pg_wchar_combining(const pg_wchar current)
+{
+ pg_unicode_decomposition *currEntry = get_code_entry(current);
+ if (currEntry == NULL)
+ return false;
+ if (currEntry->comb_class == 0x0)
+ return false;
+ return true;
+}
diff --git a/src/include/common/unicode_norm.h b/src/include/common/unicode_norm.h
index 99167d2..bdcf02e 100644
--- a/src/include/common/unicode_norm.h
+++ b/src/include/common/unicode_norm.h
@@ -17,5 +17,6 @@
#include "mb/pg_wchar.h"
extern pg_wchar *unicode_normalize_kc(const pg_wchar *input);
+extern bool is_pg_wchar_combining(const pg_wchar current);
#endif /* UNICODE_NORM_H */
