From 5664c795f091f1fef4976954e776fd5c87b0d85d Mon Sep 17 00:00:00 2001
From: J Smith <dark.panda@gmail.com>
Date: Mon, 7 Nov 2011 00:46:22 -0500
Subject: [PATCH] Fix weirdness when dealing with UTF-8 in buggy libc
 implementations.

OSX libc has a bug from an older FreeBSD libc that causes it to see
certain characters as spaces incorrectly when using UTF-8.
---
 contrib/unaccent/unaccent.c |   11 ++++++++++-
 1 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index d9c2eac..e2d01eb 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -96,6 +96,7 @@ initSuffixTree(char *filename)
 		int			srclen;
 		int			trglen;
 		char	   *line = NULL;
+		char	   *tok = NULL;
 
 		skip = true;
 
@@ -108,8 +109,16 @@ initSuffixTree(char *filename)
 			 */
 			while ((line = tsearch_readline(&trst)) != NULL)
 			{
-				if (sscanf(line, "%s\t%s\n", src, trg) != 2)
+				if ((tok = strchr(line, '\t')) == NULL) {
 					continue;
+				}
+
+				sprintf(src, "%.*s", (int) (tok - line), line);
+				sprintf(trg, "%s", tok + 1);
+
+				if ((tok = strchr(trg, '\n')) != NULL) {
+					tok[0] = '\0';
+				}
 
 				srclen = strlen(src);
 				trglen = strlen(trg);
-- 
1.7.7.2

