Update of /cvsroot/monetdb/MonetDB5/src/modules/mal
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv15444

Modified Files:
        txtsim.mx 
Log Message:
fixes to the m5 interface (strings are always passed as str *)
fixed stringdiff


Index: txtsim.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/modules/mal/txtsim.mx,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- txtsim.mx   10 Dec 2007 21:23:59 -0000      1.1
+++ txtsim.mx   18 Dec 2007 22:34:17 -0000      1.2
@@ -55,6 +55,10 @@
 address soundex_impl
 comment "Soundex function for phonetic matching";
 
+command stringdiff(s1:str, s2:str) :int 
+address stringdiff_impl
+comment "calculate the soundexed editdistance";
+
 command qgramnormalize(input:str): str 
 address CMDqgramnormalize
 comment "'Normalizes' strings (eg. toUpper and replaces non-alphanumerics with 
one space";
@@ -90,21 +94,24 @@
 #define txtsim_export extern
 #endif
 
-txtsim_export int levenshtein_impl(int *result, str s, str t, int 
*insdel_cost, int *replace_cost, int *transpose_cost);
-txtsim_export int levenshteinbasic_impl(int *result, str s, str t);
-txtsim_export int levenshteinbasic2_impl(int *result, str s, str t);
-txtsim_export int fstrcmp_impl(dbl *ret, str string1, str string2, dbl 
*minimum);
-txtsim_export int fstrcmp0_impl(dbl *ret, str string1, str string2);
-txtsim_export int soundex_impl(str *res, str Name);
-txtsim_export int CMDqgramnormalize(str *res, str input);
-txtsim_export int CMDqgramselfjoin(BAT **res, BAT *qgram, BAT *id, BAT *pos, 
BAT *len, flt *c, int *k);
+txtsim_export str levenshtein_impl(int *result, str *s, str *t, int 
*insdel_cost, int *replace_cost, int *transpose_cost);
+txtsim_export str levenshteinbasic_impl(int *result, str *s, str *t);
+txtsim_export str levenshteinbasic2_impl(int *result, str *s, str *t);
+txtsim_export str fstrcmp_impl(dbl *ret, str *string1, str *string2, dbl 
*minimum);
+txtsim_export str fstrcmp0_impl(dbl *ret, str *string1, str *string2);
+txtsim_export str soundex_impl(str *res, str *Name);
+txtsim_export str stringdiff_impl(int *res, str *s1, str*s2);
+txtsim_export str CMDqgramnormalize(str *res, str *input);
+txtsim_export str CMDqgramselfjoin(BAT **res, BAT *qgram, BAT *id, BAT *pos, 
BAT *len, flt *c, int *k);
 
 #endif /*_TXTSIM_H*/
 
 
 
 @c
+#include "mal_config.h"
 #include "txtsim.h"
+#include "mal_exception.h"
 
 
 #define RETURN_NIL_IF(b,t) \
@@ -114,7 +121,7 @@
           } else {\
              memcpy(res, ATOMnilptr(t), ATOMsize(t));\
           }\
-          return GDK_SUCCEED; \
+          return MAL_SUCCEED; \
        }
 
 /* =========================================================================
@@ -169,9 +176,11 @@
 /******************************
  * Compute Levenshtein distance
  *****************************/
-int
-levenshtein_impl(int *result, str s, str t, int *insdel_cost, int 
*replace_cost, int *transpose_cost)
+str
+levenshtein_impl(int *result, str *S, str *T, int *insdel_cost, int 
*replace_cost, int *transpose_cost)
 {
+       char *s = *S;
+       char *t = *T;
        int *d;                 /* pointer to matrix */
        int n;                  /* length of s */
        int m;                  /* length of t */
@@ -191,10 +200,12 @@
        n = (int) strlen(s);    /* 64bit: assume strings are less than 2 GB */
        m = (int) strlen(t);
        if (n == 0) {
-               return m;
+               *result = m;
+               return MAL_SUCCEED;
        }
        if (m == 0) {
-               return n;
+               *result = n;
+               return MAL_SUCCEED;
        }
        sz = (n + 1) * (m + 1) * sizeof(int);
        d = (int *) GDKmalloc(sz);
@@ -250,19 +261,19 @@
        /* Step 7 */
        *result = levenshtein_GetAt(d, n, m, n);
        GDKfree(d);
-       return GDK_SUCCEED;
+       return MAL_SUCCEED;
 }
 
-int
-levenshteinbasic_impl(int *result, str s, str t)
+str
+levenshteinbasic_impl(int *result, str *s, str *t)
 {
        int insdel = 1, replace = 1, transpose = 2;
 
        return levenshtein_impl(result, s, t, &insdel, &replace, &transpose);
 }
 
-int
-levenshteinbasic2_impl(int *result, str s, str t)
+str
+levenshteinbasic2_impl(int *result, str *s, str *t)
 {
        int insdel = 1, replace = 1, transpose = 1;
 
@@ -332,19 +343,32 @@
 }
 
 
-int
-soundex_impl(str *res, str Name)
+str
+soundex_impl(str *res, str *Name)
 {
-       RETURN_NIL_IF(strNil(Name), TYPE_str);
+       RETURN_NIL_IF(strNil(*Name), TYPE_str);
 
        *res = (str) GDKmalloc(sizeof(char) * (SoundexLen + 1));
 
        /* calculate Key for Name */
-       soundex_code(Name, *res);
+       soundex_code(*Name, *res);
 
-       return GDK_SUCCEED;
+       return MAL_SUCCEED;
 }
 
+str
+stringdiff_impl(int *res, str *s1, str *s2)
+{
+       str r = MAL_SUCCEED;
+       char *S1 = NULL, *S2 = NULL;
+
+       soundex_impl(&S1, s1);
+       soundex_impl(&S2, s2);
+       r = levenshteinbasic_impl(res, &S1, &S2);
+       GDKfree(S1);
+       GDKfree(S2);
+       return r;
+}
 
 /******************************
  * QGRAMNORMALIZE
@@ -357,9 +381,10 @@
  * qgramnormalize(" '' t ' est").print(); --> [ "T EST" ]
  *
  *****************************/
-int
-CMDqgramnormalize(str *res, str input)
+str
+CMDqgramnormalize(str *res, str *Input)
 {
+       char *input = *Input;
        int i, j = 0;
        char c, last = ' ';
 
@@ -381,8 +406,7 @@
        while (j > 0 && (*res)[--j] == ' ')
                (*res)[j] = 0;
 
-       return GDK_SUCCEED;
-
+       return MAL_SUCCEED;
 }
 
 /* =========================================================================
@@ -855,9 +879,11 @@
        strings are identical, and a number in between if they are
        similar.  */
 
-int
-fstrcmp_impl(dbl *ret, str string1, str string2, dbl *minimum)
+str
+fstrcmp_impl(dbl *ret, str *S1, str *S2, dbl *minimum)
 {
+       char *string1 = *S1;
+       char *string2 = *S2;
        int i;
 
        size_t fdiag_len;
@@ -873,11 +899,11 @@
        /* short-circuit obvious comparisons */
        if (string[0].data_length == 0 && string[1].data_length == 0) {
                *ret = 1.0;
-               return GDK_SUCCEED;
+               return MAL_SUCCEED;
        }
        if (string[0].data_length == 0 || string[1].data_length == 0) {
                *ret = 0.0;
-               return GDK_SUCCEED;
+               return MAL_SUCCEED;
        }
 
        /* Set TOO_EXPENSIVE to be approximate square root of input size,
@@ -914,11 +940,11 @@
        *ret = ((double)
                (string[0].data_length + string[1].data_length - 
string[1].edit_count - string[0].edit_count)
                / (string[0].data_length + string[1].data_length));
-       return GDK_SUCCEED;
+       return MAL_SUCCEED;
 }
 
-int
-fstrcmp0_impl(dbl *ret, str string1, str string2)
+str
+fstrcmp0_impl(dbl *ret, str *string1, str *string2)
 {
        double min = 0.0;
 
@@ -928,19 +954,17 @@
 
 /* ============ Q-GRAM SELF JOIN ============== */
 
-int
+str
 CMDqgramselfjoin(BAT **res, BAT *qgram, BAT *id, BAT *pos, BAT *len, flt *c, 
int *k)
 {
        size_t n = BATcount(qgram);
        unsigned int i, j;
        BAT *bn;
 
-       oid *qbuf;
-       int *ibuf, *pbuf, *lbuf;
-       qbuf = (oid *) Tloc(qgram,  BUNfirst(qgram));
-       ibuf = (int *) Tloc(id, BUNfirst(id));
-       pbuf = (int *) Tloc(pos, BUNfirst(pos));
-       lbuf = (int *) Tloc(len, BUNfirst(len));
+       oid *qbuf = (oid *) Tloc(qgram, BUNfirst(qgram));
+       int *ibuf = (int *) Tloc(id, BUNfirst(id));
+       int *pbuf = (int *) Tloc(pos, BUNfirst(pos));
+       int *lbuf = (int *) Tloc(len, BUNfirst(len));
 
        ERRORcheck((qgram->ttype != TYPE_oid), "CMDqgramselfjoin: tail of BAT 
qgram must be oid.\n");
        ERRORcheck((id->ttype != TYPE_int), "CMDqgramselfjoin: tail of BAT id 
must be int.\n");
@@ -955,10 +979,10 @@
        ERRORcheck((ALIGNsynced(qgram, len) == 0), "CMDqgramselfjoin: qgram and 
len are not synced");
 
        ERRORcheck((Tsize(qgram) != ATOMsize(qgram->ttype)), "CMDqgramselfjoin: 
qgram is not a true void bat");
-       ERRORcheck((Tsize(qgram) != ATOMsize(id->ttype)), "CMDqgramselfjoin: id 
is not a true void bat");
+       ERRORcheck((Tsize(id) != ATOMsize(id->ttype)), "CMDqgramselfjoin: id is 
not a true void bat");
 
        ERRORcheck((Tsize(pos) != ATOMsize(pos->ttype)), "CMDqgramselfjoin: pos 
is not a true void bat");
-       ERRORcheck((Tsize(qgram) != ATOMsize(len->ttype)), "CMDqgramselfjoin: 
len is not a true void bat");
+       ERRORcheck((Tsize(len) != ATOMsize(len->ttype)), "CMDqgramselfjoin: len 
is not a true void bat");
 
        *res = bn = BATnew(TYPE_int, TYPE_int, n);
 
@@ -972,30 +996,16 @@
 
        bn->hsorted = bn->tsorted = 0;
 
-       return GDK_SUCCEED;
+       return MAL_SUCCEED;
       bunins_failed:
-       GDKerror("CMDqgramselfjoin: could not realloc\n");
        BBPreclaim(bn);
-       return GDK_FAIL;
+       throw(MAL, "txtsim.qgramselfjoin", "could not realloc\n");
 }
 
 @mal
 
 #mil implementation
 
-#proc stringdiff( str s1, str s1 ) : int {
-#      var sd1 := soundex(s1);
-#      var sd2 := soundex(s2);
-#      return editdistance(sd1,sd2);
-#}
-
-#mal implementation
-function stringdiff( s1:str, s2:str):int;
-       sd1 := soundex(s1);
-       sd2 := soundex(s2);
-       return editdistance(sd1,sd2);
-end stringdiff;
-
 #mil implementation
 #proc str2qgrams(str s) : bat[oid,str]
 #{


-------------------------------------------------------------------------
SF.Net email is sponsored by:
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services
for just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to