Changeset: e752aa525361 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e752aa525361
Modified Files:
        gdk/gdk_strimps.c
Branch: string_imprints
Log Message:

Handle ignored bytes correctly


diffs (35 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -93,7 +93,7 @@ GDKstrimp_ndigrams(BAT *b, size_t *n)
 gdk_return
 GDKstrimp_makehistogram(BAT *b, uint64_t *hist, size_t hist_size, size_t 
*count)
 {
-       lng t0;
+       lng t0=0;
        size_t hi;
        BUN i;
        BATiter bi;
@@ -111,7 +111,21 @@ GDKstrimp_makehistogram(BAT *b, uint64_t
                s = (char *)BUNtvar(bi, i);
                if (!strNil(s)) {
                        for(ptr = s; *ptr != 0 && *(ptr + 1) != 0; ptr++) {
-                               if (isNotIgnored(*ptr) && 
isNotIgnored(*(ptr+1))) {
+                               if (isIgnored(*(ptr+1))) {
+                                       /* Skip this and the next pair
+                                        * if the next char is ignored.
+                                        */
+                                       ptr++;
+                               }
+                               else if (isIgnored(*ptr)) {
+                                       /* Skip this pair if the current
+                                        * char is ignored. This should
+                                        * only happen at the beginnig
+                                        * of a string.
+                                        */
+                                       ;
+                               }
+                               else {
                                        hi = pairToIndex(*(ptr), *(ptr+1));
                                        assert(hi < hist_size);
                                        if (hist[hi] == 0)
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to