Hello community,

here is the log from the commit of package python-jellyfish for 
openSUSE:Factory checked in at 2019-09-13 14:59:36
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-jellyfish (Old)
 and      /work/SRC/openSUSE:Factory/.python-jellyfish.new.7948 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "python-jellyfish"

Fri Sep 13 14:59:36 2019 rev:4 rq:730189 version:0.7.2

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-jellyfish/python-jellyfish.changes        
2019-04-04 12:07:49.893416265 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-jellyfish.new.7948/python-jellyfish.changes  
    2019-09-13 14:59:41.349280721 +0200
@@ -1,0 +2,8 @@
+Wed Sep 11 14:35:56 UTC 2019 - Tomáš Chvátal <[email protected]>
+
+- Update to 0.7.2:
+  * fix CJellyfish damerau_levenshtein w/ unicode, thanks to immerrr
+  * fix final H in NYSIIS
+  * fix issue w/ trailing W in metaphone
+
+-------------------------------------------------------------------

Old:
----
  jellyfish-0.7.1.tar.gz

New:
----
  jellyfish-0.7.2.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-jellyfish.spec ++++++
--- /var/tmp/diff_new_pack.RfcMtD/_old  2019-09-13 14:59:42.129280754 +0200
+++ /var/tmp/diff_new_pack.RfcMtD/_new  2019-09-13 14:59:42.133280754 +0200
@@ -19,7 +19,7 @@
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 %define skip_python2 1
 Name:           python-jellyfish
-Version:        0.7.1
+Version:        0.7.2
 Release:        0
 Summary:        A library for doing approximate and phonetic matching of 
strings
 License:        BSD-2-Clause
@@ -56,6 +56,7 @@
 %python_expand %fdupes %{buildroot}%{$python_sitearch}
 
 %check
+export PYTHONDONTWRITEBYTECODE=1
 %pytest %{buildroot}%{$python_sitearch}/jellyfish/test.py
 
 %files %{python_files}

++++++ jellyfish-0.7.1.tar.gz -> jellyfish-0.7.2.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/PKG-INFO new/jellyfish-0.7.2/PKG-INFO
--- old/jellyfish-0.7.1/PKG-INFO        2019-01-11 05:35:53.000000000 +0100
+++ new/jellyfish-0.7.2/PKG-INFO        2019-06-05 17:04:54.000000000 +0200
@@ -1,6 +1,6 @@
 Metadata-Version: 1.2
 Name: jellyfish
-Version: 0.7.1
+Version: 0.7.2
 Summary: a library for doing approximate and phonetic matching of strings.
 Home-page: http://github.com/jamesturk/jellyfish
 License: UNKNOWN
@@ -8,8 +8,8 @@
         jellyfish
         =========
         
-        .. image:: https://travis-ci.org/jamesturk/jellyfish.svg?branch=master
-            :target: https://travis-ci.org/jamesturk/jellyfish
+        .. image:: https://travis-ci.com/jamesturk/jellyfish.svg?branch=master
+            :target: https://travis-ci.com/jamesturk/jellyfish
         
         .. image:: 
https://coveralls.io/repos/jamesturk/jellyfish/badge.png?branch=master
             :target: https://coveralls.io/r/jamesturk/jellyfish
@@ -97,5 +97,6 @@
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
 Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
 Classifier: Topic :: Text Processing :: Linguistic
 Requires-Python: >3.4
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/README.rst 
new/jellyfish-0.7.2/README.rst
--- old/jellyfish-0.7.1/README.rst      2019-01-11 05:30:23.000000000 +0100
+++ new/jellyfish-0.7.2/README.rst      2019-06-05 02:01:29.000000000 +0200
@@ -2,8 +2,8 @@
 jellyfish
 =========
 
-.. image:: https://travis-ci.org/jamesturk/jellyfish.svg?branch=master
-    :target: https://travis-ci.org/jamesturk/jellyfish
+.. image:: https://travis-ci.com/jamesturk/jellyfish.svg?branch=master
+    :target: https://travis-ci.com/jamesturk/jellyfish
 
 .. image:: 
https://coveralls.io/repos/jamesturk/jellyfish/badge.png?branch=master
     :target: https://coveralls.io/r/jamesturk/jellyfish
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/cjellyfish/damerau_levenshtein.c 
new/jellyfish-0.7.2/cjellyfish/damerau_levenshtein.c
--- old/jellyfish-0.7.1/cjellyfish/damerau_levenshtein.c        2019-01-11 
05:32:26.000000000 +0100
+++ new/jellyfish-0.7.2/cjellyfish/damerau_levenshtein.c        2019-06-05 
01:39:11.000000000 +0200
@@ -3,6 +3,139 @@
 #include <stdio.h>
 #include <wchar.h>
 
+/*
+
+  Trie is a nested search tree, where each node's key is broken down into parts
+  and looking up a certain key means a sequence of lookups in small associative
+  arrays. They are usually used for strings, where a word "foo" would be
+  basically looked up as trie["f"]["o"]["o"].
+
+  In this case, the tries split the incoming integer into segments, omitting
+  upper zero ones, and they are looked up as follows:
+
+  I.e. for segments of 1 byte,
+
+  - for key = 0x11, the result is d->values[0x11]
+  - for key = 0x1122  -  d->child_nodes[0x11]->values[0x22]
+  - for key = 0x112233  -  
d->child_nodes[0x11]->child_nodes[0x22]->values[0x33]
+
+  And so on.
+
+  Child nodes are created on demand, when a value should be stored in them.
+
+  If no value is stored in the trie for a certain key, the lookup returns 0.
+
+*/
+
+
+#define TRIE_VALUES_PER_LEVEL 256
+/* Each level takes one byte from dictionary key, hence max levels is: */
+#define TRIE_MAX_LEVELS sizeof(size_t)
+
+struct trie {
+    size_t* values;
+    struct trie** child_nodes;
+};
+
+
+struct trie* trie_create(void)
+{
+    return calloc(1, sizeof(struct trie));
+}
+
+
+void trie_destroy(struct trie* d)
+{
+    size_t i;
+    if (!d) {
+        return;
+    }
+    free(d->values);
+    if (d->child_nodes) {
+        for (i = 0; i < TRIE_VALUES_PER_LEVEL; ++i) {
+            trie_destroy(d->child_nodes[i]);
+        }
+    }
+    free(d->child_nodes);
+    free(d);
+}
+
+
+size_t trie_get(struct trie* d, size_t key)
+{
+    size_t level_keys[TRIE_MAX_LEVELS];
+    size_t level_pos = 0;
+
+    size_t cur_remainder = key;
+    size_t cur_key;
+    while (1) {
+        level_keys[level_pos] = cur_remainder % TRIE_VALUES_PER_LEVEL;
+        cur_remainder /= TRIE_VALUES_PER_LEVEL;
+        if (!cur_remainder) {
+            break;
+        }
+        ++level_pos;
+    }
+
+    while (level_pos) {
+        cur_key = level_keys[level_pos];
+        if (!d->child_nodes || !d->child_nodes[cur_key]) {
+            return 0;
+        }
+        d = d->child_nodes[cur_key];
+        --level_pos;
+    }
+    if (!d->values) {
+        return 0;
+    }
+    return d->values[level_keys[0]];
+}
+
+
+int trie_set(struct trie* d, size_t key, size_t val)
+{
+    size_t level_keys[TRIE_MAX_LEVELS];
+    size_t level_pos = 0;
+
+    size_t cur_remainder = key;
+    size_t cur_key;
+    while (1) {
+        level_keys[level_pos] = cur_remainder % TRIE_VALUES_PER_LEVEL;
+        cur_remainder /= TRIE_VALUES_PER_LEVEL;
+        if (!cur_remainder) {
+            break;
+        }
+        ++level_pos;
+    }
+
+    while (level_pos) {
+        cur_key = level_keys[level_pos];
+        if (!d->child_nodes) {
+            d->child_nodes = calloc(TRIE_VALUES_PER_LEVEL, sizeof(struct 
trie*));
+            if (!d->child_nodes) {
+                return 0;
+            }
+        }
+        if (!d->child_nodes[cur_key]) {
+            d->child_nodes[cur_key] = trie_create();
+            if (!d->child_nodes[cur_key]){
+                return 0;
+            }
+        }
+        d = d->child_nodes[cur_key];
+        --level_pos;
+    }
+
+    if (!d->values) {
+        d->values = calloc(TRIE_VALUES_PER_LEVEL, sizeof(size_t));
+        if (!d->values) {
+            return 0;
+        }
+    }
+    d->values[level_keys[0]] = val;
+    return 1;
+}
+
 
 int damerau_levenshtein_distance(const JFISH_UNICODE *s1, const JFISH_UNICODE 
*s2, size_t len1, size_t len2)
 {
@@ -12,21 +145,19 @@
     size_t i, j, i1, j1;
     size_t db;
     size_t d1, d2, d3, d4, result;
-    size_t da_idx;
     unsigned short cost;
 
     size_t *dist = NULL;
 
-    const size_t len_da = 256;
-    size_t *da = calloc(len_da, sizeof(size_t));
+    struct trie* da = trie_create();
     if (!da) {
         return -1;
     }
 
     dist = malloc((len1 + 2) * cols * sizeof(size_t));
     if (!dist) {
-        free(da);
-        return -1;
+        result = -1;
+        goto cleanup_da;
     }
 
     dist[0] = infinite;
@@ -44,13 +175,7 @@
     for (i = 1; i <= len1; i++) {
         db = 0;
         for (j = 1; j <= len2; j++) {
-            da_idx = (JFISH_UNICODE)s2[j-1];
-            if (da_idx >= len_da) {
-                free(dist);
-                free(da);
-                return -2;
-            }
-            i1 = da[da_idx];
+            i1 = trie_get(da, s2[j-1]);
             j1 = db;
 
             if (s1[i - 1] == s2[j - 1]) {
@@ -68,19 +193,20 @@
             dist[((i+1)*cols) + j + 1] = MIN(MIN(d1, d2), MIN(d3, d4));
         }
 
-        da_idx = (JFISH_UNICODE)s1[i-1];
-        if (da_idx >= len_da) {
-            free(dist);
-            free(da);
-            return -2;
-        }
-        da[da_idx] = i;
+        if (!trie_set(da, s1[i-1], i)) {
+            result = -1;
+            goto cleanup;
+        };
     }
 
     result = dist[((len1+1) * cols) + len2 + 1];
 
+
+ cleanup:
     free(dist);
-    free(da);
+
+ cleanup_da:
+    trie_destroy(da);
 
     return result;
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/cjellyfish/jellyfishmodule.c 
new/jellyfish-0.7.2/cjellyfish/jellyfishmodule.c
--- old/jellyfish-0.7.1/cjellyfish/jellyfishmodule.c    2019-01-11 
05:32:26.000000000 +0100
+++ new/jellyfish-0.7.2/cjellyfish/jellyfishmodule.c    2019-06-05 
01:39:11.000000000 +0200
@@ -16,8 +16,6 @@
 #define INLINE inline
 #endif
 
-#define UNSUPPORTED_CODEPOINT "Encountered unsupported code point in string."
-
 
 /* Returns a new reference to a PyString (python < 3) or
  * PyBytes (python >= 3.0).
@@ -144,11 +142,6 @@
         PyErr_NoMemory();
         return NULL;
     }
-    else if (result == -2) {
-        PyErr_SetString(PyExc_ValueError, UNSUPPORTED_CODEPOINT);
-        return NULL;
-    }
-
     return Py_BuildValue("i", result);
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/cjellyfish/metaphone.c 
new/jellyfish-0.7.2/cjellyfish/metaphone.c
--- old/jellyfish-0.7.1/cjellyfish/metaphone.c  2019-01-11 05:32:26.000000000 
+0100
+++ new/jellyfish-0.7.2/cjellyfish/metaphone.c  2019-06-05 16:54:46.000000000 
+0200
@@ -160,8 +160,10 @@
         case 'w':
             if (s == str && next == 'h') {
                 next = tolower(*(++s + 1));
-            }
-            if (ISVOWEL(next) || next == 0) {
+                if (ISVOWEL(next) || next == 0) {
+                    *r++ = 'W';
+                }
+            } else if (ISVOWEL(next)) {
                 *r++ = 'W';
             }
             break;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/docs/changelog.rst 
new/jellyfish-0.7.2/docs/changelog.rst
--- old/jellyfish-0.7.1/docs/changelog.rst      2019-01-11 05:35:33.000000000 
+0100
+++ new/jellyfish-0.7.2/docs/changelog.rst      2019-06-05 02:01:51.000000000 
+0200
@@ -1,6 +1,11 @@
 Changelog
 =========
 
+0.7.2 - WIP
+-----------------------
+* fix CJellyfish damerau_levenshtein w/ unicode, thanks to immerrr
+* fix final H in NYSIIS
+
 0.7.1 - 10 January 2019
 -----------------------
 * restrict install to Python >= 3.4
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/docs/conf.py 
new/jellyfish-0.7.2/docs/conf.py
--- old/jellyfish-0.7.1/docs/conf.py    2019-01-11 05:35:39.000000000 +0100
+++ new/jellyfish-0.7.2/docs/conf.py    2019-06-05 02:01:29.000000000 +0200
@@ -54,7 +54,7 @@
 # The short X.Y version.
 version = '0.7'
 # The full version, including alpha/beta/rc tags.
-release = '0.7.1'
+release = '0.7.2'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/jellyfish/_jellyfish.py 
new/jellyfish-0.7.2/jellyfish/_jellyfish.py
--- old/jellyfish-0.7.1/jellyfish/_jellyfish.py 2019-01-11 05:30:23.000000000 
+0100
+++ new/jellyfish-0.7.2/jellyfish/_jellyfish.py 2019-06-05 16:54:26.000000000 
+0200
@@ -278,7 +278,7 @@
         elif ch == 'P' and i+1 < len(s) and s[i+1] == 'H':
             ch = 'F'
             i += 1
-        elif ch == 'H' and (s[i-1] not in 'AEIOU' or (i+1 < len(s) and s[i+1] 
not in 'AEIOU')):
+        elif ch == 'H' and (s[i-1] not in 'AEIOU' or (i+1 < len(s) and s[i+1] 
not in 'AEIOU') or (i+1 == len(s))):
             if s[i-1] in 'AEIOU':
                 ch = 'A'
             else:
@@ -466,7 +466,7 @@
                 i += 1
                 if nextnext in 'aeiou' or nextnext == '*****':
                     result.append('w')
-            elif next in 'aeiou' or next == '*****':
+            elif next in 'aeiou':
                 result.append('w')
         elif c == 'x':
             if i == 0:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/jellyfish/test.py 
new/jellyfish-0.7.2/jellyfish/test.py
--- old/jellyfish-0.7.1/jellyfish/test.py       2019-01-11 05:30:23.000000000 
+0100
+++ new/jellyfish-0.7.2/jellyfish/test.py       2019-06-05 02:01:29.000000000 
+0200
@@ -109,15 +109,13 @@
         assert [[jf.match_rating_comparison(h1, h2) for h1 in sha1s] for h2 in 
sha1s]
 
     def test_damerau_levenshtein_unicode_segfault():
-        # unfortunate difference in behavior between Py & C versions
+        # test that unicode works in C & Python versions now
         from jellyfish.cjellyfish import damerau_levenshtein_distance as c_dl
         from jellyfish._jellyfish import damerau_levenshtein_distance as py_dl
         s1 = u'mylifeoutdoors'
         s2 = u'нахлыст'
-        with pytest.raises(ValueError):
-            c_dl(s1, s2)
-        with pytest.raises(ValueError):
-            c_dl(s2, s1)
+        assert c_dl(s1, s2) == 14
+        assert c_dl(s2, s1) == 14
 
         assert py_dl(s1, s2) == 14
         assert py_dl(s2, s1) == 14
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/jellyfish.egg-info/PKG-INFO 
new/jellyfish-0.7.2/jellyfish.egg-info/PKG-INFO
--- old/jellyfish-0.7.1/jellyfish.egg-info/PKG-INFO     2019-01-11 
05:35:52.000000000 +0100
+++ new/jellyfish-0.7.2/jellyfish.egg-info/PKG-INFO     2019-06-05 
17:04:54.000000000 +0200
@@ -1,6 +1,6 @@
 Metadata-Version: 1.2
 Name: jellyfish
-Version: 0.7.1
+Version: 0.7.2
 Summary: a library for doing approximate and phonetic matching of strings.
 Home-page: http://github.com/jamesturk/jellyfish
 License: UNKNOWN
@@ -8,8 +8,8 @@
         jellyfish
         =========
         
-        .. image:: https://travis-ci.org/jamesturk/jellyfish.svg?branch=master
-            :target: https://travis-ci.org/jamesturk/jellyfish
+        .. image:: https://travis-ci.com/jamesturk/jellyfish.svg?branch=master
+            :target: https://travis-ci.com/jamesturk/jellyfish
         
         .. image:: 
https://coveralls.io/repos/jamesturk/jellyfish/badge.png?branch=master
             :target: https://coveralls.io/r/jamesturk/jellyfish
@@ -97,5 +97,6 @@
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
 Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
 Classifier: Topic :: Text Processing :: Linguistic
 Requires-Python: >3.4
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/setup.py new/jellyfish-0.7.2/setup.py
--- old/jellyfish-0.7.1/setup.py        2019-01-11 05:35:08.000000000 +0100
+++ new/jellyfish-0.7.2/setup.py        2019-06-05 02:01:29.000000000 +0200
@@ -95,7 +95,7 @@
         long_description = readme.read()
 
     setup(name="jellyfish",
-          version="0.7.1",
+          version="0.7.2",
           python_requires=">3.4",
           platforms=["any"],
           description=("a library for doing approximate and "
@@ -110,6 +110,7 @@
                        "Programming Language :: Python :: 3.4",
                        "Programming Language :: Python :: 3.5",
                        "Programming Language :: Python :: 3.6",
+                       "Programming Language :: Python :: 3.7",
                        "Topic :: Text Processing :: Linguistic"],
           **kw)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/testdata/metaphone.csv 
new/jellyfish-0.7.2/testdata/metaphone.csv
--- old/jellyfish-0.7.1/testdata/metaphone.csv  2019-01-11 05:32:26.000000000 
+0100
+++ new/jellyfish-0.7.2/testdata/metaphone.csv  2019-06-05 01:59:15.000000000 
+0200
@@ -27,3 +27,4 @@
 Utah,UT
 WH,W
 walt,WLT
+ANDREW,ANTR
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/jellyfish-0.7.1/testdata/nysiis.csv 
new/jellyfish-0.7.2/testdata/nysiis.csv
--- old/jellyfish-0.7.1/testdata/nysiis.csv     2019-01-11 05:32:26.000000000 
+0100
+++ new/jellyfish-0.7.2/testdata/nysiis.csv     2019-06-05 01:59:15.000000000 
+0200
@@ -32,3 +32,4 @@
 M,M
 E,E
 PFEISTER,FASTAR
+SARAH,SAR


Reply via email to