-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 06/14/2013 12:08 PM, Liming Hu wrote:
>>>>>>> I have implemented the code according to Joe's
>>>>>>> suggestion, and put the code at: 
>>>>>>> https://github.com/liminghu/fuzzystrmatch/tree/fuzzystrmatchv1.1
>>>>>>>
>
>>>>>>> 
Please submit a proper patch so it can be seen on our mailing list
>>>>>> archives.
>>>>>> 
>>>>> Hi Alvaro,
>>>>> 
>>>>> I am kind of new to the Postgresql hacker community, Can
>>>>> you please help me on submit the patch?

I have not done much in the way of real review yet, but here at least
is a context diff against git master.

Joe


- -- 
Joe Conway
credativ LLC: http://www.credativ.us
Linux, PostgreSQL, and general Open Source
Training, Service, Consulting, & 24x7 Support
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.12 (GNU/Linux)
Comment: Using GnuPG with undefined - http://www.enigmail.net/

iQIcBAEBAgAGBQJRyOw2AAoJEDfy90M199hlFhEP/2o/d08Fq4CHA9iI05PxDwQM
AHF6PWS5gJToNLtiTevyEamWyzlULjX3yJ9gTAhxc+ltxE9Xuf3/bfMcJQSbJ5c9
6GSH7CWpaY1DpfAwvYiwTJ9EPBZ11u8VZaMrb1VU2DS8rioOOL3llzpk+D6/1no5
y327vlH1aOuqk3Hwu0c/WN8RAcf1HLbhafph2KruUfr/ba3uILBQZtzpyK4uCDew
OJA+cktXHv3ho7w4N//xVQs3sZ/EoLahOt/y4syd3dN+Cq/8kj/uJaVgT2QY8rDQ
HIBpYvm+b3DsYpjw2qrSVBriIupF2a0UPkLfC5cu3om8VvBilShydE6uTI+f+55p
a12NuzepwgDnpZ1jOZkkmnBslpfoZI9TEo1UDeZ8x/TSZDW72FhwRtWq9kO9CFIK
cJvG9B9E5zhyZx9V1C2S0qpvIJAj/Gns4ymvYU1lm5UUnpPSpTQoUK8ybrfnHoTE
B0DEVjqxbrk9SSJ5LI3KojAaSMUIQDcjMQFCsghR1s5/yRhpZ7xEPvcL63ARwDcv
ZeeL6r5G+nmKAfRAjGbmWi/Y1ANI0ucO5XxnfhSDb+TCQow4U6IgNvkYjN1hTNEI
//9mQHDHi5qsuGcRcgvFLLeR4eVSGewseYLOR9XELMYTam4IUClwPr6WHuMqE/aE
jvMZafqZ/1EhQ2SeqCo4
=wcGM
-----END PGP SIGNATURE-----
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--1.0.sql fuzzystrmatch/fuzzystrmatch--1.0.sql
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--1.0.sql	2012-02-25 20:24:23.000000000 -0600
--- fuzzystrmatch/fuzzystrmatch--1.0.sql	2013-06-11 04:09:01.000000000 -0500
***************
*** 1,4 ****
! /* contrib/fuzzystrmatch/fuzzystrmatch--1.0.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION fuzzystrmatch" to load this file. \quit
--- 1,4 ----
! /* contrib/fuzzystrmatch/fuzzystrmatch--1.1.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION fuzzystrmatch" to load this file. \quit
***************
*** 19,24 ****
--- 19,44 ----
  AS 'MODULE_PATHNAME','levenshtein_less_equal_with_costs'
  LANGUAGE C IMMUTABLE STRICT;
  
+ CREATE FUNCTION dameraulevenshteinnocompatible (text,text,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_with_costs_noncompatible'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein (text,text) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein (text,text,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
  CREATE FUNCTION metaphone (text,int) RETURNS text
  AS 'MODULE_PATHNAME','metaphone'
  LANGUAGE C IMMUTABLE STRICT;
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--1.1.sql fuzzystrmatch/fuzzystrmatch--1.1.sql
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--1.1.sql	1969-12-31 18:00:00.000000000 -0600
--- fuzzystrmatch/fuzzystrmatch--1.1.sql	2013-06-11 04:09:01.000000000 -0500
***************
*** 0 ****
--- 1,60 ----
+ /* contrib/fuzzystrmatch/fuzzystrmatch--1.1.sql */
+ 
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
+ \echo Use "CREATE EXTENSION fuzzystrmatch" to load this file. \quit
+ 
+ CREATE FUNCTION levenshtein (text,text) RETURNS int
+ AS 'MODULE_PATHNAME','levenshtein'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION levenshtein (text,text,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','levenshtein_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION levenshtein_less_equal (text,text,int) RETURNS int
+ AS 'MODULE_PATHNAME','levenshtein_less_equal'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION levenshtein_less_equal (text,text,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','levenshtein_less_equal_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein (text,text) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein (text,text,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION metaphone (text,int) RETURNS text
+ AS 'MODULE_PATHNAME','metaphone'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION soundex(text) RETURNS text
+ AS 'MODULE_PATHNAME', 'soundex'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION text_soundex(text) RETURNS text
+ AS 'MODULE_PATHNAME', 'soundex'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION difference(text,text) RETURNS int
+ AS 'MODULE_PATHNAME', 'difference'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dmetaphone (text) RETURNS text
+ AS 'MODULE_PATHNAME', 'dmetaphone'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dmetaphone_alt (text) RETURNS text
+ AS 'MODULE_PATHNAME', 'dmetaphone_alt'
+ LANGUAGE C IMMUTABLE STRICT;
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch.c fuzzystrmatch/fuzzystrmatch.c
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch.c	2013-02-03 11:09:42.000000000 -0600
--- fuzzystrmatch/fuzzystrmatch.c	2013-06-11 04:09:01.000000000 -0500
***************
*** 53,58 ****
--- 53,64 ----
  extern Datum levenshtein(PG_FUNCTION_ARGS);
  extern Datum levenshtein_less_equal_with_costs(PG_FUNCTION_ARGS);
  extern Datum levenshtein_less_equal(PG_FUNCTION_ARGS);
+ 
+ extern Datum dameraulevenshtein_with_costs(PG_FUNCTION_ARGS);
+ extern Datum dameraulevenshtein(PG_FUNCTION_ARGS);
+ extern Datum dameraulevenshtein_less_equal_with_costs(PG_FUNCTION_ARGS);
+ extern Datum dameraulevenshtein_less_equal(PG_FUNCTION_ARGS);
+ 
  extern Datum metaphone(PG_FUNCTION_ARGS);
  extern Datum soundex(PG_FUNCTION_ARGS);
  extern Datum difference(PG_FUNCTION_ARGS);
***************
*** 193,199 ****
  	int			del_c = PG_GETARG_INT32(3);
  	int			sub_c = PG_GETARG_INT32(4);
  
! 	PG_RETURN_INT32(levenshtein_internal(src, dst, ins_c, del_c, sub_c));
  }
  
  
--- 199,205 ----
  	int			del_c = PG_GETARG_INT32(3);
  	int			sub_c = PG_GETARG_INT32(4);
  
! 	PG_RETURN_INT32(levenshtein_internal(src, dst, ins_c, del_c, sub_c, 0));
  }
  
  
***************
*** 204,210 ****
  	text	   *src = PG_GETARG_TEXT_PP(0);
  	text	   *dst = PG_GETARG_TEXT_PP(1);
  
! 	PG_RETURN_INT32(levenshtein_internal(src, dst, 1, 1, 1));
  }
  
  
--- 210,216 ----
  	text	   *src = PG_GETARG_TEXT_PP(0);
  	text	   *dst = PG_GETARG_TEXT_PP(1);
  
! 	PG_RETURN_INT32(levenshtein_internal(src, dst, 1, 1, 1, 0));
  }
  
  
***************
*** 219,225 ****
  	int			sub_c = PG_GETARG_INT32(4);
  	int			max_d = PG_GETARG_INT32(5);
  
! 	PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, ins_c, del_c, sub_c, max_d));
  }
  
  
--- 225,231 ----
  	int			sub_c = PG_GETARG_INT32(4);
  	int			max_d = PG_GETARG_INT32(5);
  
! 	PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, ins_c, del_c, sub_c, 0, max_d));
  }
  
  
***************
*** 231,240 ****
  	text	   *dst = PG_GETARG_TEXT_PP(1);
  	int			max_d = PG_GETARG_INT32(2);
  
! 	PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, 1, 1, 1, max_d));
  }
  
  
  /*
   * Calculates the metaphone of an input string.
   * Returns number of characters requested
--- 237,298 ----
  	text	   *dst = PG_GETARG_TEXT_PP(1);
  	int			max_d = PG_GETARG_INT32(2);
  
! 	PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, 1, 1, 1, 0, max_d));
! }
! 
! PG_FUNCTION_INFO_V1(dameraulevenshtein_with_costs);
! Datum
! dameraulevenshtein_with_costs(PG_FUNCTION_ARGS)
! {
! 	text	   *src = PG_GETARG_TEXT_PP(0);
! 	text	   *dst = PG_GETARG_TEXT_PP(1);
! 	int			ins_c = PG_GETARG_INT32(2);
! 	int			del_c = PG_GETARG_INT32(3);
! 	int			sub_c = PG_GETARG_INT32(4);
! 	int			trans_c = PG_GETARG_INT32(5);
! 
! 	PG_RETURN_INT32(levenshtein_internal(src, dst, ins_c, del_c, sub_c, trans_c));
! }
! 
! 
! PG_FUNCTION_INFO_V1(dameraulevenshtein);
! Datum
! dameraulevenshtein(PG_FUNCTION_ARGS)
! {
! 	text	   *src = PG_GETARG_TEXT_PP(0);
! 	text	   *dst = PG_GETARG_TEXT_PP(1);
! 
! 	PG_RETURN_INT32(levenshtein_internal(src, dst, 1, 1, 1, 1));
  }
  
  
+ PG_FUNCTION_INFO_V1(dameraulevenshtein_less_equal_with_costs);
+ Datum
+ dameraulevenshtein_less_equal_with_costs(PG_FUNCTION_ARGS)
+ {
+ 	text	   *src = PG_GETARG_TEXT_PP(0);
+ 	text	   *dst = PG_GETARG_TEXT_PP(1);
+ 	int			ins_c = PG_GETARG_INT32(2);
+ 	int			del_c = PG_GETARG_INT32(3);
+ 	int			sub_c = PG_GETARG_INT32(4);
+ 	int			trans_c = PG_GETARG_INT32(5);
+ 	int			max_d = PG_GETARG_INT32(6);
+ 
+ 	PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, ins_c, del_c, sub_c, trans_c, max_d));
+ }
+ 
+ 
+ PG_FUNCTION_INFO_V1(dameraulevenshtein_less_equal);
+ Datum
+ dameraulevenshtein_less_equal(PG_FUNCTION_ARGS)
+ {
+ 	text	   *src = PG_GETARG_TEXT_PP(0);
+ 	text	   *dst = PG_GETARG_TEXT_PP(1);
+ 	int			max_d = PG_GETARG_INT32(2);
+ 
+ 	PG_RETURN_INT32(levenshtein_less_equal_internal(src, dst, 1, 1, 1, 1, max_d));
+ }
+ 
  /*
   * Calculates the metaphone of an input string.
   * Returns number of characters requested
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch.control fuzzystrmatch/fuzzystrmatch.control
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch.control	2011-02-17 11:19:41.000000000 -0600
--- fuzzystrmatch/fuzzystrmatch.control	2013-06-11 04:09:01.000000000 -0500
***************
*** 1,5 ****
  # fuzzystrmatch extension
  comment = 'determine similarities and distance between strings'
! default_version = '1.0'
  module_pathname = '$libdir/fuzzystrmatch'
  relocatable = true
--- 1,5 ----
  # fuzzystrmatch extension
  comment = 'determine similarities and distance between strings'
! default_version = '1.1'
  module_pathname = '$libdir/fuzzystrmatch'
  relocatable = true
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--unpackaged--1.0.sql fuzzystrmatch/fuzzystrmatch--unpackaged--1.0.sql
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--unpackaged--1.0.sql	2012-02-25 20:24:23.000000000 -0600
--- fuzzystrmatch/fuzzystrmatch--unpackaged--1.0.sql	2013-06-11 04:09:01.000000000 -0500
***************
*** 1,10 ****
! /* contrib/fuzzystrmatch/fuzzystrmatch--unpackaged--1.0.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION fuzzystrmatch" to load this file. \quit
  
  ALTER EXTENSION fuzzystrmatch ADD function levenshtein(text,text);
  ALTER EXTENSION fuzzystrmatch ADD function levenshtein(text,text,integer,integer,integer);
  ALTER EXTENSION fuzzystrmatch ADD function metaphone(text,integer);
  ALTER EXTENSION fuzzystrmatch ADD function soundex(text);
  ALTER EXTENSION fuzzystrmatch ADD function text_soundex(text);
--- 1,14 ----
! /* contrib/fuzzystrmatch/fuzzystrmatch--unpackaged--1.1.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION fuzzystrmatch" to load this file. \quit
  
  ALTER EXTENSION fuzzystrmatch ADD function levenshtein(text,text);
  ALTER EXTENSION fuzzystrmatch ADD function levenshtein(text,text,integer,integer,integer);
+ 
+ ALTER EXTENSION fuzzystrmatch ADD function dameraulevenshteinnoncompatible(text,text,integer,integer,integer,integer);
+ ALTER EXTENSION fuzzystrmatch ADD function dameraulevenshtein(text,text);
+ ALTER EXTENSION fuzzystrmatch ADD function dameraulevenshtein(text,text,integer,integer,integer,integer);
  ALTER EXTENSION fuzzystrmatch ADD function metaphone(text,integer);
  ALTER EXTENSION fuzzystrmatch ADD function soundex(text);
  ALTER EXTENSION fuzzystrmatch ADD function text_soundex(text);
***************
*** 21,23 ****
--- 25,39 ----
  CREATE FUNCTION levenshtein_less_equal (text,text,int,int,int,int) RETURNS int
  AS 'MODULE_PATHNAME','levenshtein_less_equal_with_costs'
  LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal_noncompatible (text,text,int,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal_with_costs_noncompatible'
+ LANGUAGE C IMMUTABLE STRICT;
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--unpackaged--1.1.sql fuzzystrmatch/fuzzystrmatch--unpackaged--1.1.sql
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/fuzzystrmatch--unpackaged--1.1.sql	1969-12-31 18:00:00.000000000 -0600
--- fuzzystrmatch/fuzzystrmatch--unpackaged--1.1.sql	2013-06-11 04:09:01.000000000 -0500
***************
*** 0 ****
--- 1,33 ----
+ /* contrib/fuzzystrmatch/fuzzystrmatch--unpackaged--1.1.sql */
+ 
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
+ \echo Use "CREATE EXTENSION fuzzystrmatch" to load this file. \quit
+ 
+ ALTER EXTENSION fuzzystrmatch ADD function levenshtein(text,text);
+ ALTER EXTENSION fuzzystrmatch ADD function levenshtein(text,text,integer,integer,integer);
+ ALTER EXTENSION fuzzystrmatch ADD function levenshtein_less_equal (text,text,int);
+ ALTER EXTENSION fuzzystrmatch ADD function levenshtein_less_equal (text,text,int,int,int,int);
+ ALTER EXTENSION fuzzystrmatch ADD function metaphone(text,integer);
+ ALTER EXTENSION fuzzystrmatch ADD function soundex(text);
+ ALTER EXTENSION fuzzystrmatch ADD function text_soundex(text);
+ ALTER EXTENSION fuzzystrmatch ADD function difference(text,text);
+ ALTER EXTENSION fuzzystrmatch ADD function dmetaphone(text);
+ ALTER EXTENSION fuzzystrmatch ADD function dmetaphone_alt(text);
+ 
+ -- these functions were not in 9.3
+ 
+ CREATE FUNCTION dameraulevenshtein (text,text) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein (text,text,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal'
+ LANGUAGE C IMMUTABLE STRICT;
+ 
+ CREATE FUNCTION dameraulevenshtein_less_equal (text,text,int,int,int,int,int) RETURNS int
+ AS 'MODULE_PATHNAME','dameraulevenshtein_less_equal_with_costs'
+ LANGUAGE C IMMUTABLE STRICT;
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/levenshtein.c fuzzystrmatch/levenshtein.c
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/levenshtein.c	2013-02-03 11:09:42.000000000 -0600
--- fuzzystrmatch/levenshtein.c	2013-06-11 04:09:01.000000000 -0500
***************
*** 16,21 ****
--- 16,27 ----
   * inspiration.
   * Configurable penalty costs extension is introduced by Volkan
   * YAZICI <volkan.yaz...@gmail.com>.
+  * Damerau levenshtein variant
+  * Liming Hu <dawnin...@gmail.com>
+  * based on description of the algorithm:
+  * http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
+  * and:
+  * http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/tools/perf/util/levenshtein.c
   */
  
  /*
***************
*** 23,41 ****
   */
  #ifdef LEVENSHTEIN_LESS_EQUAL
  static int levenshtein_less_equal_internal(text *s, text *t,
! 								int ins_c, int del_c, int sub_c, int max_d);
  #else
  static int levenshtein_internal(text *s, text *t,
! 					 int ins_c, int del_c, int sub_c);
  #endif
  
  #define MAX_LEVENSHTEIN_STRLEN		255
  
  
  /*
!  * Calculates Levenshtein distance metric between supplied strings. Generally
!  * (1, 1, 1) penalty costs suffices for common cases, but your mileage may
!  * vary.
   *
   * One way to compute Levenshtein distance is to incrementally construct
   * an (m+1)x(n+1) matrix where cell (i, j) represents the minimum number
--- 29,52 ----
   */
  #ifdef LEVENSHTEIN_LESS_EQUAL
  static int levenshtein_less_equal_internal(text *s, text *t,
! 								int ins_c, int del_c, int sub_c, int trans_c, int max_d);
! static int dameraulevenshtein_less_equal_internal(text *s, text *t,
! 								int ins_c, int del_c, int sub_c, int trans_c, int max_d);
! 
  #else
  static int levenshtein_internal(text *s, text *t,
! 					 int ins_c, int del_c, int sub_c, int trans_c);
! static int dameraulevenshtein_internal(text *s, text *t,
! 					 int ins_c, int del_c, int sub_c, int trans_c);
  #endif
  
  #define MAX_LEVENSHTEIN_STRLEN		255
  
  
  /*
!  * Calculates [Damerau ]Levenshtein distance metric between supplied strings. Generally
!  * (1, 1, 1 [, 1]) penalty costs suffices for common cases, but your mileage
!  * may vary.
   *
   * One way to compute Levenshtein distance is to incrementally construct
   * an (m+1)x(n+1) matrix where cell (i, j) represents the minimum number
***************
*** 43,53 ****
   * the first j characters of t.  The last column of the final row is the
   * answer.
   *
!  * We use that algorithm here with some modification.  In lieu of holding
!  * the entire array in memory at once, we'll just use two arrays of size
!  * m+1 for storing accumulated values. At each step one array represents
!  * the "previous" row and one is the "current" row of the notional large
!  * array.
   *
   * If max_d >= 0, we only need to provide an accurate answer when that answer
   * is less than or equal to the bound.	From any cell in the matrix, there is
--- 54,75 ----
   * the first j characters of t.  The last column of the final row is the
   * answer.
   *
!  * We use that algorithm here with some modification.  In the case of Levenshtein 
!  * distance: in lieu of holding the entire array in memory at once, we'll just use 
!  * two arrays of size m+1 for storing accumulated values. At each step one array 
!  * represents the "previous" row and one is the "current" row of the notional large
!  * array. In the case of Damerau-Levenshtein distance, to avoid a large space complexity, 
!  * only the last three rows are kept in memory (if swaps had the same or higher cost 
!  * as one deletion plus one insertion, only two rows would be needed).
!  * At any stage, "i + 1" denotes the length of the current substring of
!  * s_data that the distance is calculated for.
!  *
!  * row2 holds the current row, row1 the previous row (i.e. for the substring
!  * of s_data of length "i"), and row0 the row before that.
!  *
!  * In other words, at the start of the big loop, row2[j + 1] contains the
!  * Damerau-Levenshtein distance between the substring of s_data of length
!  * "i" and the substring of t_data of length "j + 1".
   *
   * If max_d >= 0, we only need to provide an accurate answer when that answer
   * is less than or equal to the bound.	From any cell in the matrix, there is
***************
*** 66,77 ****
  static int
  #ifdef LEVENSHTEIN_LESS_EQUAL
  levenshtein_less_equal_internal(text *s, text *t,
! 								int ins_c, int del_c, int sub_c, int max_d)
  #else
  levenshtein_internal(text *s, text *t,
! 					 int ins_c, int del_c, int sub_c)
  #endif
  {
  	int			m,
  				n,
  				s_bytes,
--- 88,112 ----
  static int
  #ifdef LEVENSHTEIN_LESS_EQUAL
  levenshtein_less_equal_internal(text *s, text *t,
! 								int ins_c, int del_c, int sub_c, int trans_c, int max_d)
  #else
  levenshtein_internal(text *s, text *t,
! 					 int ins_c, int del_c, int sub_c, int trans_c)
  #endif
  {
+ 
+ 
+ 
+ #ifdef LEVENSHTEIN_LESS_EQUAL
+ 	if (trans_c!=0) 
+ 		return dameraulevenshtein_less_equal_internal(s,t, ins_c, del_c, sub_c, trans_c, max_d);
+ #else
+ 	if (trans_c!=0) 
+ 		return dameraulevenshtein_internal(s,t, ins_c, del_c, sub_c, trans_c);
+ #endif
+ 		
+ 	
+ 
  	int			m,
  				n,
  				s_bytes,
***************
*** 275,280 ****
--- 310,316 ----
  				int			ins;
  				int			del;
  				int			sub;
+ 				int			trans;
  				int			x_char_len = s_char_len[i - 1];
  
  				/*
***************
*** 310,315 ****
--- 346,352 ----
  				int			ins;
  				int			del;
  				int			sub;
+ 				int			trans;
  
  				/* Calculate costs for insertion, deletion, and substitution. */
  				ins = prev[i] + ins_c;
***************
*** 401,403 ****
--- 438,504 ----
  	 */
  	return prev[m - 1];
  }
+ 
+ static int 
+ #ifdef LEVENSHTEIN_LESS_EQUAL
+ 	dameraulevenshtein_less_equal_internal(text *s, text *t,
+ 					 int ins_c, int del_c, int sub_c, int trans_c, int max_d)
+ #else
+ 	dameraulevenshtein_internal(text *s, text *t,
+ 					 int ins_c, int del_c, int sub_c, int trans_c)
+ #endif
+ {
+ 
+ 	/* Extract a pointer to the actual character data. */
+ 	const char *s_data = VARDATA_ANY(s);
+ 	const char *t_data = VARDATA_ANY(t);
+ 
+ 	/* Determine length of each string in bytes and characters. */
+ 	int s_bytes = VARSIZE_ANY_EXHDR(s);
+ 	int t_bytes = VARSIZE_ANY_EXHDR(t);
+         /* returns the length (counted in wchars) of a multibyte string
+          * (not necessarily NULL terminated)
+          */
+ 	int length1 = pg_mbstrlen_with_len(s_data, s_bytes);
+ 	int length2 = pg_mbstrlen_with_len(t_data, t_bytes);
+ 
+         int *row0 = (int*) palloc(sizeof(int) * (length2 + 1)); /*the row before that.*/
+         int *row1 = (int*) palloc(sizeof(int) * (length2 + 1)); /*the previous row, for the substring of s_data of length i*/
+         int *row2 = (int*) palloc(sizeof(int) * (length2 + 1)); /*current row.*/
+         int i, j;
+ 	int distance = 0;
+  
+         for (j = 0; j <= length2; j++) /*length2+1*/
+         	row1[j] = j * ins_c;   
+ 
+         for (i = 0; i < length1; i++) { /*length1:           determines the partial minimum-cost paths.*/
+         	int *dummy;
+  
+                 row2[0] = (i + 1) * del_c;
+                 for (j = 0; j < length2; j++) { /*length2*/
+                 	
+                         row2[j + 1] = row1[j] + sub_c * (s_data[i] != t_data[j]); /* substitution */
+                         
+                         if (i > 0 && j > 0 && s_data[i - 1] == t_data[j] &&
+                                               s_data[i]     == t_data[j - 1] &&   /* transposition */
+                             row2[j + 1] > row0[j - 1] + trans_c)
+                         	row2[j + 1] = row0[j - 1] + trans_c;
+                         
+                         if (row2[j + 1] > row1[j + 1] + del_c)                    /* deletion */
+                                 row2[j + 1] = row1[j + 1] + del_c;
+                         
+                         if (row2[j + 1] > row2[j] + ins_c)                        /* insertion */
+                                 row2[j + 1] = row2[j] + ins_c;
+                 }
+  
+                 dummy = row0;
+                 row0 = row1;
+                 row1 = row2;
+                 row2 = dummy;
+         }
+  
+         distance = row1[length2];
+         return distance;  
+   }
+ 
+ 
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/Makefile fuzzystrmatch/Makefile
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/Makefile	2011-05-06 01:09:32.000000000 -0500
--- fuzzystrmatch/Makefile	2013-06-11 04:09:01.000000000 -0500
***************
*** 4,10 ****
  OBJS = fuzzystrmatch.o dmetaphone.o
  
  EXTENSION = fuzzystrmatch
! DATA = fuzzystrmatch--1.0.sql fuzzystrmatch--unpackaged--1.0.sql
  
  ifdef USE_PGXS
  PG_CONFIG = pg_config
--- 4,10 ----
  OBJS = fuzzystrmatch.o dmetaphone.o
  
  EXTENSION = fuzzystrmatch
! DATA = fuzzystrmatch--1.1.sql fuzzystrmatch--unpackaged--1.1.sql
  
  ifdef USE_PGXS
  PG_CONFIG = pg_config
diff -cNr /opt/src/pgsql-git/master/contrib/fuzzystrmatch/README.md fuzzystrmatch/README.md
*** /opt/src/pgsql-git/master/contrib/fuzzystrmatch/README.md	1969-12-31 18:00:00.000000000 -0600
--- fuzzystrmatch/README.md	2013-06-11 04:09:01.000000000 -0500
***************
*** 0 ****
--- 1,4 ----
+ fuzzystrmatch
+ =============
+ 
+ add Damerau-Levenshtein algorithm to fuzzystrmatch in PostgreSql

Attachment: Levenshtein-Damerau.diff.sig
Description: PGP signature

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to