Such version with macros and includes can look like this: #ifdef MULTIBYTE #define NEXT_X (x+= char_lens[i-1]) #define NEXT_Y (y+= y_char_len) #define CMP (char_cmp(x, char_lens[i-1], y, y_char_len)) #else #define NEXT_X (x++) #define NEXT_Y (y++) #define CMP (*x == *y) #endif
static int levenshtein_internal(text *s, text *t, int ins_c, int del_c, int sub_c) { int m, n, d; int *prev; int *curr; int i, j; const char *x; const char *y; #ifdef MULTIBYTE int *char_lens; int y_char_len; #endif /* * We should calculate number of characters for multibyte encodings */ m = pg_mbstrlen_with_len(VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s)); n = pg_mbstrlen_with_len(VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); /* * We can transform an empty s into t with n insertions, or a non-empty t * into an empty s with m deletions. */ if (m == 0) return n * ins_c; if (n == 0) return m * del_c; /* * For security concerns, restrict excessive CPU+RAM usage. (This * implementation uses O(m) memory and has O(mn) complexity.) */ if (m > MAX_LEVENSHTEIN_STRLEN || n > MAX_LEVENSHTEIN_STRLEN) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("argument exceeds the maximum length of %d bytes", MAX_LEVENSHTEIN_STRLEN))); /* One more cell for initialization column and row. */ ++m; ++n; /* * Instead of building an (m+1)x(n+1) array, we'll use two different * arrays of size m+1 for storing accumulated values. At each step one * represents the "previous" row and one is the "current" row of the * notional large array. * For multibyte encoding we'll also store array of lengths of * characters of first string in order to avoid great number of * pg_mblen calls. */ #ifdef MULTIBYTE prev = (int *) palloc(3 * m * sizeof(int)); curr = prev + m; char_lens = prev + 2 * m; for (i = 0, x = VARDATA_ANY(s); i < m - 1; i++) { char_lens[i] = pg_mblen(x); x += char_lens[i]; } char_lens[i] = 0; #else prev = (int *) palloc(2 * m * sizeof(int)); curr = prev + m; #endif /* Initialize the "previous" row to 0..cols */ for (i = 0; i < m; i++) prev[i] = i * del_c; /* * For multibyte encoding we should increase x and y pointers by the * results of pg_mblen function. Also we should use CHAR_CMP macros * for character comparison. */ for (y = VARDATA_ANY(t), j = 1; j < n; NEXT_Y, j++) { int *temp; #ifdef MULTIBYTE y_char_len = pg_mblen(y); #endif curr[0] = j * ins_c; for (x = VARDATA_ANY(s), i = 1; i < m; NEXT_X, i++) { int ins; int del; int sub; ins = prev[i] + ins_c; del = curr[i - 1] + del_c; sub = prev[i - 1] + (CMP ? 0 : sub_c); curr[i] = Min(ins, del); curr[i] = Min(curr[i], sub); } temp = curr; curr = prev; prev = temp; } d = prev[m - 1]; /* * Because the final value was swapped from the previous row to the * current row, that's where we'll find it. */ return d; } What do you thing about it? ---- With best regards, Alexander Korotkov.