Changeset: ca1500dcb7c4 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ca1500dcb7c4
Modified Files:
        clients/Tests/MAL-signatures-hge.test
        clients/Tests/MAL-signatures.test
        monetdb5/modules/mal/txtsim.c
Branch: txtsim
Log Message:

Add comments back and approve the signatures


diffs (truncated from 310 to 300 lines):

diff --git a/clients/Tests/MAL-signatures-hge.test 
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -35507,17 +35507,17 @@ battxtsim
 maxlevenshtein
 pattern battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], 
X_2:int):bat[:bit] 
 BATTXTSIMmaxlevenshtein;
-(empty)
+Same as maxlevenshtein but for BATS
 battxtsim
 maxlevenshtein
 pattern battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], X_2:int, 
X_3:int, X_4:int):bat[:bit] 
 BATTXTSIMmaxlevenshtein;
-(empty)
+Same as maxlevenshtein but for BATS
 battxtsim
 similarity
 command battxtsim.similarity(X_0:bat[:str], X_1:bat[:str]):bat[:dbl] 
 fstrcmp0_impl_bulk;
-(empty)
+(Deprecated) Normalized edit distance between two strings
 baturl
 extractURLHost
 command baturl.extractURLHost(X_0:bat[:str], X_1:bit):bat[:str] 
@@ -51007,87 +51007,87 @@ txtsim
 dameraulevenshtein
 pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int 
 TXTSIMdameraulevenshtein;
-(empty)
+Calculates Damerau-Levenshtein distance between two strings, operation costs 
(ins/del = 1, replacement = 1, transposition = 2)
 txtsim
 dameraulevenshtein
 pattern txtsim.dameraulevenshtein(X_0:str, X_1:str, X_2:int, X_3:int, 
X_4:int):int 
 TXTSIMdameraulevenshtein;
-(empty)
+Calculates Damerau-Levenshtein distance between two strings, variable 
operation costs (ins/del, replacement, transposition)
 txtsim
 editdistance
 command txtsim.editdistance(X_0:str, X_1:str):int 
 TXTSIMdameraulevenshtein1;
-(empty)
+Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and 
transpose = 2
 txtsim
 editdistance2
 command txtsim.editdistance2(X_0:str, X_1:str):int 
 TXTSIMdameraulevenshtein2;
-(empty)
+Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and 
transpose = 1
 txtsim
 jaro_winkler_similarity
 command txtsim.jaro_winkler_similarity(X_0:str, X_1:str):dbl 
 jaro_winkler_similarity;
-(empty)
+Calculate Jaro Winkler similarity
 txtsim
 levenshtein
 pattern txtsim.levenshtein(X_0:str, X_1:str):int 
 TXTSIMlevenshtein;
-(empty)
+Calculates Levenshtein distance between two strings, operation costs (ins/del 
= 1, replacement = 1)
 txtsim
 levenshtein
 pattern txtsim.levenshtein(X_0:str, X_1:str, X_2:int, X_3:int):int 
 TXTSIMlevenshtein;
-(empty)
+Calculates Levenshtein distance between two strings, variable operation costs 
(ins/del, replacement)
 txtsim
 levenshtein
 pattern txtsim.levenshtein(X_0:str, X_1:str, X_2:int, X_3:int, X_4:int):int 
 TXTSIMlevenshtein;
-(empty)
+(Backwards compatibility purposes) Calculates Damerau-Levenshtein distance 
between two strings, variable operation costs (ins/del, replacement, 
transposition)
 txtsim
 maxlevenshtein
 pattern txtsim.maxlevenshtein(X_0:str, X_1:str, X_2:int):int 
 TXTSIMmaxlevenshtein;
-(empty)
+Levenshtein distance with basic costs but up to a MAX
 txtsim
 maxlevenshtein
 pattern txtsim.maxlevenshtein(X_0:str, X_1:str, X_2:int, X_3:int, X_4:int):int 
 TXTSIMmaxlevenshtein;
-(empty)
+Levenshtein distance with variable costs but up to a MAX
 txtsim
 qgramnormalize
 command txtsim.qgramnormalize(X_0:str):str 
 qgram_normalize;
-(empty)
+'Normalizes' strings (eg. toUpper and replaces non-alphanumerics with one space
 txtsim
 qgramselfjoin
 command txtsim.qgramselfjoin(X_0:bat[:oid], X_1:bat[:oid], X_2:bat[:int], 
X_3:bat[:int], X_4:flt, X_5:int) (X_6:bat[:int], X_7:bat[:int]) 
 qgram_selfjoin;
-(empty)
+QGram self-join on ordered(!) qgram tables and sub-ordered q-gram positions
 txtsim
 similarity
 command txtsim.similarity(X_0:str, X_1:str):dbl 
 fstrcmp0_impl;
-(empty)
+(Deprecated) Normalized edit distance between two strings
 txtsim
 similarity
 command txtsim.similarity(X_0:str, X_1:str, X_2:dbl):dbl 
 fstrcmp_impl;
-(empty)
+(Deprecated) Normalized edit distance between two strings
 txtsim
 soundex
 command txtsim.soundex(X_0:str):str 
 soundex;
-(empty)
+Soundex function for phonetic matching
 txtsim
 str2qgrams
 command txtsim.str2qgrams(X_0:str):bat[:str] 
 str_2_qgrams;
-(empty)
+Break the string into 4-grams
 txtsim
 stringdiff
 command txtsim.stringdiff(X_0:str, X_1:str):int 
 stringdiff;
-(empty)
+Calculate the soundexed editdistance
 url
 extractURLHost
 command url.extractURLHost(X_0:str, X_1:bit):str 
diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -26557,17 +26557,17 @@ battxtsim
 maxlevenshtein
 pattern battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], 
X_2:int):bat[:bit] 
 BATTXTSIMmaxlevenshtein;
-(empty)
+Same as maxlevenshtein but for BATS
 battxtsim
 maxlevenshtein
 pattern battxtsim.maxlevenshtein(X_0:bat[:str], X_1:bat[:str], X_2:int, 
X_3:int, X_4:int):bat[:bit] 
 BATTXTSIMmaxlevenshtein;
-(empty)
+Same as maxlevenshtein but for BATS
 battxtsim
 similarity
 command battxtsim.similarity(X_0:bat[:str], X_1:bat[:str]):bat[:dbl] 
 fstrcmp0_impl_bulk;
-(empty)
+(Deprecated) Normalized edit distance between two strings
 baturl
 extractURLHost
 command baturl.extractURLHost(X_0:bat[:str], X_1:bit):bat[:str] 
@@ -39332,87 +39332,87 @@ txtsim
 dameraulevenshtein
 pattern txtsim.dameraulevenshtein(X_0:str, X_1:str):int 
 TXTSIMdameraulevenshtein;
-(empty)
+Calculates Damerau-Levenshtein distance between two strings, operation costs 
(ins/del = 1, replacement = 1, transposition = 2)
 txtsim
 dameraulevenshtein
 pattern txtsim.dameraulevenshtein(X_0:str, X_1:str, X_2:int, X_3:int, 
X_4:int):int 
 TXTSIMdameraulevenshtein;
-(empty)
+Calculates Damerau-Levenshtein distance between two strings, variable 
operation costs (ins/del, replacement, transposition)
 txtsim
 editdistance
 command txtsim.editdistance(X_0:str, X_1:str):int 
 TXTSIMdameraulevenshtein1;
-(empty)
+Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and 
transpose = 2
 txtsim
 editdistance2
 command txtsim.editdistance2(X_0:str, X_1:str):int 
 TXTSIMdameraulevenshtein2;
-(empty)
+Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and 
transpose = 1
 txtsim
 jaro_winkler_similarity
 command txtsim.jaro_winkler_similarity(X_0:str, X_1:str):dbl 
 jaro_winkler_similarity;
-(empty)
+Calculate Jaro Winkler similarity
 txtsim
 levenshtein
 pattern txtsim.levenshtein(X_0:str, X_1:str):int 
 TXTSIMlevenshtein;
-(empty)
+Calculates Levenshtein distance between two strings, operation costs (ins/del 
= 1, replacement = 1)
 txtsim
 levenshtein
 pattern txtsim.levenshtein(X_0:str, X_1:str, X_2:int, X_3:int):int 
 TXTSIMlevenshtein;
-(empty)
+Calculates Levenshtein distance between two strings, variable operation costs 
(ins/del, replacement)
 txtsim
 levenshtein
 pattern txtsim.levenshtein(X_0:str, X_1:str, X_2:int, X_3:int, X_4:int):int 
 TXTSIMlevenshtein;
-(empty)
+(Backwards compatibility purposes) Calculates Damerau-Levenshtein distance 
between two strings, variable operation costs (ins/del, replacement, 
transposition)
 txtsim
 maxlevenshtein
 pattern txtsim.maxlevenshtein(X_0:str, X_1:str, X_2:int):int 
 TXTSIMmaxlevenshtein;
-(empty)
+Levenshtein distance with basic costs but up to a MAX
 txtsim
 maxlevenshtein
 pattern txtsim.maxlevenshtein(X_0:str, X_1:str, X_2:int, X_3:int, X_4:int):int 
 TXTSIMmaxlevenshtein;
-(empty)
+Levenshtein distance with variable costs but up to a MAX
 txtsim
 qgramnormalize
 command txtsim.qgramnormalize(X_0:str):str 
 qgram_normalize;
-(empty)
+'Normalizes' strings (eg. toUpper and replaces non-alphanumerics with one space
 txtsim
 qgramselfjoin
 command txtsim.qgramselfjoin(X_0:bat[:oid], X_1:bat[:oid], X_2:bat[:int], 
X_3:bat[:int], X_4:flt, X_5:int) (X_6:bat[:int], X_7:bat[:int]) 
 qgram_selfjoin;
-(empty)
+QGram self-join on ordered(!) qgram tables and sub-ordered q-gram positions
 txtsim
 similarity
 command txtsim.similarity(X_0:str, X_1:str):dbl 
 fstrcmp0_impl;
-(empty)
+"(Deprecated) Normalized edit distance between two strings"
 txtsim
 similarity
 command txtsim.similarity(X_0:str, X_1:str, X_2:dbl):dbl 
 fstrcmp_impl;
-(empty)
+(Deprecated) Normalized edit distance between two strings
 txtsim
 soundex
 command txtsim.soundex(X_0:str):str 
 soundex;
-(empty)
+Soundex function for phonetic matching
 txtsim
 str2qgrams
 command txtsim.str2qgrams(X_0:str):bat[:str] 
 str_2_qgrams;
-(empty)
+Break the string into 4-grams
 txtsim
 stringdiff
 command txtsim.stringdiff(X_0:str, X_1:str):int 
 stringdiff;
-(empty)
+Calculate the soundexed editdistance
 url
 extractURLHost
 command url.extractURLHost(X_0:str, X_1:bit):str 
diff --git a/monetdb5/modules/mal/txtsim.c b/monetdb5/modules/mal/txtsim.c
--- a/monetdb5/modules/mal/txtsim.c
+++ b/monetdb5/modules/mal/txtsim.c
@@ -1459,28 +1459,28 @@ fstrcmp0_impl_bulk(bat *res, bat *string
 
 #include "mel.h"
 mel_func txtsim_init_funcs[] = {
-       pattern("txtsim", "dameraulevenshtein", TXTSIMdameraulevenshtein, 
false, "", args(1,3,arg("",int),arg("x",str),arg("y",str))),
-       pattern("txtsim", "dameraulevenshtein", TXTSIMdameraulevenshtein, 
false, "", 
args(1,6,arg("",int),arg("x",str),arg("y",str),arg("insdel_cost",int),arg("replace_cost",int),arg("transpose_cost",int))),
-       command("txtsim", "editdistance", TXTSIMdameraulevenshtein1, false, "", 
args(1,3, arg("",int),arg("s",str),arg("t",str))),
-       command("txtsim", "editdistance2", TXTSIMdameraulevenshtein2, false, 
"", args(1,3, arg("",int),arg("s",str),arg("t",str))),
-       pattern("txtsim", "levenshtein", TXTSIMlevenshtein, false, "", 
args(1,3,arg("",int),arg("s",str),arg("t",str))),
-       pattern("txtsim", "levenshtein", TXTSIMlevenshtein, false, "", 
args(1,5,arg("",int),arg("x",str),arg("y",str),arg("insdel_cost",int),arg("replace_cost",int))),
-       pattern("txtsim", "levenshtein", TXTSIMlevenshtein, false, "", 
args(1,6,arg("",int),arg("x",str),arg("y",str),arg("insdel_cost",int),arg("replace_cost",int),arg("transpose_cost",int))),
-       pattern("txtsim", "maxlevenshtein", TXTSIMmaxlevenshtein, false, "", 
args(1, 4, arg("",int), arg("l",str),arg("r",str),arg("k",int))),
-       pattern("txtsim", "maxlevenshtein", TXTSIMmaxlevenshtein, false, "", 
args(1, 6, arg("",int), 
arg("l",str),arg("r",str),arg("k",int),arg("insdel_cost",int),arg("replace_cost",int))),
-       pattern("battxtsim", "maxlevenshtein", BATTXTSIMmaxlevenshtein, false, 
"", args(1, 4, batarg("",bit), batarg("l",str),batarg("r",str),arg("k",int))),
-       pattern("battxtsim", "maxlevenshtein", BATTXTSIMmaxlevenshtein, false, 
"", args(1, 6, batarg("",bit), 
batarg("l",str),batarg("r",str),arg("k",int),arg("insdel_cost",int),arg("replace_cost",int))),
+       pattern("txtsim", "dameraulevenshtein", TXTSIMdameraulevenshtein, 
false, "Calculates Damerau-Levenshtein distance between two strings, operation 
costs (ins/del = 1, replacement = 1, transposition = 2)", 
args(1,3,arg("",int),arg("x",str),arg("y",str))),
+       pattern("txtsim", "dameraulevenshtein", TXTSIMdameraulevenshtein, 
false, "Calculates Damerau-Levenshtein distance between two strings, variable 
operation costs (ins/del, replacement, transposition)", 
args(1,6,arg("",int),arg("x",str),arg("y",str),arg("insdel_cost",int),arg("replace_cost",int),arg("transpose_cost",int))),
+       command("txtsim", "editdistance", TXTSIMdameraulevenshtein1, false, 
"Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and 
transpose = 2", args(1,3, arg("",int),arg("s",str),arg("t",str))),
+       command("txtsim", "editdistance2", TXTSIMdameraulevenshtein2, false, 
"Alias for Damerau-Levenshtein(str,str), insdel cost = 1, replace cost = 1 and 
transpose = 1", args(1,3, arg("",int),arg("s",str),arg("t",str))),
+       pattern("txtsim", "levenshtein", TXTSIMlevenshtein, false, "Calculates 
Levenshtein distance between two strings, operation costs (ins/del = 1, 
replacement = 1)", args(1,3,arg("",int),arg("s",str),arg("t",str))),
+       pattern("txtsim", "levenshtein", TXTSIMlevenshtein, false, "Calculates 
Levenshtein distance between two strings, variable operation costs (ins/del, 
replacement)", 
args(1,5,arg("",int),arg("x",str),arg("y",str),arg("insdel_cost",int),arg("replace_cost",int))),
+       pattern("txtsim", "levenshtein", TXTSIMlevenshtein, false, "(Backwards 
compatibility purposes) Calculates Damerau-Levenshtein distance between two 
strings, variable operation costs (ins/del, replacement, transposition)", 
args(1,6,arg("",int),arg("x",str),arg("y",str),arg("insdel_cost",int),arg("replace_cost",int),arg("transpose_cost",int))),
+       pattern("txtsim", "maxlevenshtein", TXTSIMmaxlevenshtein, false, 
"Levenshtein distance with basic costs but up to a MAX", args(1, 4, 
arg("",int), arg("l",str),arg("r",str),arg("k",int))),
+       pattern("txtsim", "maxlevenshtein", TXTSIMmaxlevenshtein, false, 
"Levenshtein distance with variable costs but up to a MAX", args(1, 6, 
arg("",int), 
arg("l",str),arg("r",str),arg("k",int),arg("insdel_cost",int),arg("replace_cost",int))),
+       pattern("battxtsim", "maxlevenshtein", BATTXTSIMmaxlevenshtein, false, 
"Same as maxlevenshtein but for BATS", args(1, 4, batarg("",bit), 
batarg("l",str),batarg("r",str),arg("k",int))),
+       pattern("battxtsim", "maxlevenshtein", BATTXTSIMmaxlevenshtein, false, 
"Same as maxlevenshtein but for BATS", args(1, 6, batarg("",bit), 
batarg("l",str),batarg("r",str),arg("k",int),arg("insdel_cost",int),arg("replace_cost",int))),
        /* command("battxtsim", "maxlevenshteinselect", 
TXTSIMmaxlevenshteinselect, false, "", args(1,6, 
batarg("",oid),batarg("b",str),batarg("s",oid),arg("anti",bit))), */
        /* command("battxtsim", "maxlevenshteinjoin", TXTSIMmaxlevenshteinjoin, 
false, "", args(2,10, 
batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
 */
-       command("txtsim", "soundex", soundex, false, "", args(1,2, 
arg("",str),arg("name",str))),
-       command("txtsim", "stringdiff", stringdiff, false, "", args(1,3, 
arg("",int),arg("s1",str),arg("s2",str))),
-       command("txtsim", "qgramnormalize", qgram_normalize, false, "", 
args(1,2, arg("",str),arg("input",str))),
-       command("txtsim", "qgramselfjoin", qgram_selfjoin, false, "", args(2,8, 
batarg("",int),batarg("",int),batarg("qgram",oid),batarg("id",oid),batarg("pos",int),batarg("len",int),arg("c",flt),arg("k",int))),
-       command("txtsim", "str2qgrams", str_2_qgrams, false, "", args(1,2, 
batarg("",str),arg("s",str))),
-       command("txtsim", "jaro_winkler_similarity", jaro_winkler_similarity, 
false, "", args(1,3, arg("",dbl),arg("x",str),arg("y",str))),
-       command("txtsim", "similarity", fstrcmp_impl, false, "", args(1,4, 
arg("",dbl),arg("string1",str),arg("string2",str),arg("minimum",dbl))),
-       command("txtsim", "similarity", fstrcmp0_impl, false, "", args(1,3, 
arg("",dbl),arg("string1",str),arg("string2",str))),
-       command("battxtsim", "similarity", fstrcmp0_impl_bulk, false, "", 
args(1,3, batarg("",dbl),batarg("string1",str),batarg("string2",str))),
+       command("txtsim", "soundex", soundex, false, "Soundex function for 
phonetic matching", args(1,2, arg("",str),arg("name",str))),
+       command("txtsim", "stringdiff", stringdiff, false, "Calculate the 
soundexed editdistance", args(1,3, arg("",int),arg("s1",str),arg("s2",str))),
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to