From c6db7527ad14138f8c4183d1d6261d07500f404b Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 6 Mar 2017 11:52:36 +0900
Subject: [PATCH 11/11] Set of fixes for SASLprep

The following things are fixed here:
- Incorrect allocation.
- Code borders for Hangul calculations.
---
 src/common/utf_norm.c               | 14 +++++++-------
 src/common/utf_norm_generate.pl     |  2 +-
 src/include/common/utf_norm_table.h |  6 ++----
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/common/utf_norm.c b/src/common/utf_norm.c
index 2e7d6264fd..d91028fe2a 100644
--- a/src/common/utf_norm.c
+++ b/src/common/utf_norm.c
@@ -25,10 +25,10 @@
 #include "mb/pg_wchar.h"
 
 /* Constants for calculations wih Hangul characters */
-#define SBASE		0xAC00
-#define LBASE		0x1100
-#define VBASE		0x1161
-#define TBASE		0x11A7
+#define SBASE		0xEAB080	/* U+AC00 */
+#define LBASE		0xE18480	/* U+1100 */
+#define VBASE		0xE185A1	/* U+1161 */
+#define TBASE		0xE186A7	/* U+11A7 */
 #define LCOUNT		19
 #define VCOUNT		21
 #define TCOUNT		28
@@ -587,7 +587,7 @@ utf_sasl_prepare(const char *input)
 	 * recomposed string cannot be longer than the decomposed one, so
 	 * make the allocation of the recomposed string based on that assumption.
 	 */
-	recomp_chars = (pg_wchar *) malloc(decomp_size * sizeof(int));
+	recomp_chars = (pg_wchar *) malloc(decomp_size * sizeof(pg_wchar));
 	last_class = -1;	 /* this eliminates a special check */
 	starter_pos = 0;
 	target_pos = 1;
@@ -599,9 +599,9 @@ utf_sasl_prepare(const char *input)
 		pg_utf_decomposition *ch_entry = get_code_entry(ch);
 		int			ch_class = ch_entry == NULL ? 0 : ch_entry->class;
 		pg_wchar	composite;
-		bool		found_match = recompose_code(starter_ch, ch, &composite);
 
-		if (found_match && last_class < ch_class)
+		if (last_class < ch_class &&
+			recompose_code(starter_ch, ch, &composite))
 		{
 			recomp_chars[starter_pos] = composite;
 			starter_ch = composite;
diff --git a/src/common/utf_norm_generate.pl b/src/common/utf_norm_generate.pl
index e03316e074..b876cf3215 100644
--- a/src/common/utf_norm_generate.pl
+++ b/src/common/utf_norm_generate.pl
@@ -79,7 +79,7 @@ my @no_recomp_codes = (
 	'0FA7',  # TIBETAN SUBJOINED LETTER BHA
 	'0FAC',  # TIBETAN SUBJOINED LETTER DZHA
 	'0FB9',  # TIBETAN SUBJOINED LETTER KSSA
-	# 'FB1D' # HEBREW LETTER YOD WITH HIRIQ:  see below for $OPTYWH
+	'FB1D',  # HEBREW LETTER YOD WITH HIRIQ:
 	'FB1F',  # HEBREW LIGATURE YIDDISH YOD YOD PATAH
 	'FB2A',  # HEBREW LETTER SHIN WITH SHIN DOT
 	'FB2B',  # HEBREW LETTER SHIN WITH SIN DOT
diff --git a/src/include/common/utf_norm_table.h b/src/include/common/utf_norm_table.h
index 9fa3b7ca99..a5248c777e 100644
--- a/src/include/common/utf_norm_table.h
+++ b/src/include/common/utf_norm_table.h
@@ -24,7 +24,7 @@ typedef struct
 } pg_utf_decomposition;
 
 /* conversion table */
-static const pg_utf_decomposition UtfDecompMain[ 6452 ] =
+static const pg_utf_decomposition UtfDecompMain[ 6451 ] =
 {
 	{0xc2a0, 0x00, 1},
 	{0xc2a8, 0x00, 2},
@@ -3447,7 +3447,6 @@ static const pg_utf_decomposition UtfDecompMain[ 6452 ] =
 	{0xefac95, 0x00, 2},
 	{0xefac96, 0x00, 2},
 	{0xefac97, 0x00, 2},
-	{0xefac9d, 0x00, 2},
 	{0xefac9e, 0x1a, 0},
 	{0xefaca0, 0x00, 1},
 	{0xefaca1, 0x00, 1},
@@ -10066,7 +10065,7 @@ typedef struct
 	uint32	decomp[2];	/* size of decomposition code list */
 } pg_utf_decomposition_size_2;
 
-static const pg_utf_decomposition_size_2 UtfDecomp_2 [ 1591 ] =
+static const pg_utf_decomposition_size_2 UtfDecomp_2 [ 1590 ] =
 {
 	{0xc2a8, {0x20, 0xcc88}},
 	{0xc2af, {0x20, 0xcc84}},
@@ -11273,7 +11272,6 @@ static const pg_utf_decomposition_size_2 UtfDecomp_2 [ 1591 ] =
 	{0xefac95, {0xd5b4, 0xd5ab}},
 	{0xefac96, {0xd5be, 0xd5b6}},
 	{0xefac97, {0xd5b4, 0xd5ad}},
-	{0xefac9d, {0xd799, 0xd6b4}},
 	{0xefad8f, {0xd790, 0xd79c}},
 	{0xefafaa, {0xd8a6, 0xd8a7}},
 	{0xefafab, {0xd8a6, 0xd8a7}},
-- 
2.12.0

