From f3344f279b2781a63ef689fc49b0d28f7f8294a1 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@2ndquadrant.com>
Date: Sun, 18 Jul 2021 17:34:09 -0400
Subject: [PATCH v18 2/6] Unroll loop in DFA

We know exactly how many state transitions will happen for each
valid sequence, and we know we can't possibly overshoot the input
length, so compute all transitions and check the state at the end.
---
 src/common/wchar.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/common/wchar.c b/src/common/wchar.c
index aafc602bcd..0454e332cc 100644
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -1890,16 +1890,45 @@ utf8_advance(const unsigned char *s)
 	uint64		state = BGN;
 	int			l = 0;
 
-	do
+	class = ByteCategory[*s++];
+	state = class >> (state & DFA_MASK);
+
+	switch(class)
 	{
-		class = ByteCategory[*s++];
-		state = (class >> state) & DFA_MASK;
-		l++;
-	} while (state > ERR);
+		case L2A:
+			l = 2;
+			class = ByteCategory[*s++];
+			state = class >> (state & DFA_MASK);
+			break;
+		case L3A:
+		case L3B:
+		case L3C:
+			l = 3;
+			class = ByteCategory[*s++];
+			state = class >> (state & DFA_MASK);
+			class = ByteCategory[*s++];
+			state = class >> (state & DFA_MASK);
+			break;
+		case L4A:
+		case L4B:
+		case L4C:
+			l = 4;
+			class = ByteCategory[*s++];
+			state = class >> (state & DFA_MASK);
+			class = ByteCategory[*s++];
+			state = class >> (state & DFA_MASK);
+			class = ByteCategory[*s++];
+			state = class >> (state & DFA_MASK);
+			break;
+		default:
+			l = 1;
+			Assert(class == NZA || class == ILL);
+	}
 
-	if (state == ERR)
+	if ((state & DFA_MASK) == ERR)
 		return -1;
 
+	Assert((state & DFA_MASK) == END);
 	Assert(l <= 4);
 	return l;
 }
-- 
2.31.1

