commit python-padatious for openSUSE:Factory

Source-Sync Wed, 05 Oct 2022 22:42:55 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-padatious for 
openSUSE:Factory checked in at 2022-10-06 07:42:32
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-padatious (Old)
 and      /work/SRC/openSUSE:Factory/.python-padatious.new.2275 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-padatious"

Thu Oct  6 07:42:32 2022 rev:9 rq:1008240 version:0.4.8

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-padatious/python-padatious.changes        
2020-05-26 17:17:29.751739093 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-padatious.new.2275/python-padatious.changes  
    2022-10-06 07:42:43.708757350 +0200
@@ -1,0 +2,6 @@
+Tue Oct  4 22:31:49 UTC 2022 - Yogalakshmi Arunachalam <[email protected]>
+
+- version update to 0.4.8 
+  * fix and tests for issue #23 (#23)
+
+-------------------------------------------------------------------

Old:
----
  v0.4.7.tar.gz

New:
----
  v0.4.8.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-padatious.spec ++++++
--- /var/tmp/diff_new_pack.Cydpjh/_old  2022-10-06 07:42:44.784759745 +0200
+++ /var/tmp/diff_new_pack.Cydpjh/_new  2022-10-06 07:42:44.788759754 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package python-padatious
 #
-# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2022 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
 %define skip_python2 1
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-padatious
-Version:        0.4.7
+Version:        0.4.8
 Release:        0
 Summary:        A neural network intent parser
 License:        Apache-2.0
@@ -35,7 +35,7 @@
 Requires:       python-setuptools
 Requires:       python-xxhash
 Requires(post): update-alternatives
-Requires(postun): update-alternatives
+Requires(postun):update-alternatives
 BuildArch:      noarch
 # SECTION test requirements
 BuildRequires:  %{python_module fann2}

++++++ v0.4.7.tar.gz -> v0.4.8.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/padatious-0.4.7/README.md 
new/padatious-0.4.8/README.md
--- old/padatious-0.4.7/README.md       2019-03-30 08:05:14.000000000 +0100
+++ new/padatious-0.4.8/README.md       2020-05-25 11:49:26.000000000 +0200
@@ -1,8 +1,13 @@
-# Padatious #
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE.md)
 
[![CLA](https://img.shields.io/badge/CLA%3F-Required-blue.svg)](https://mycroft.ai/cla)
 
[![Team](https://img.shields.io/badge/Team-Mycroft_Core-violetblue.svg)](https://github.com/MycroftAI/contributors/blob/master/team/Mycroft%20Core.md)
 ![Status](https://img.shields.io/badge/-Production_ready-green.svg)
 
-An efficient and agile neural network intent parser
+[![PRs 
Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](http://makeapullrequest.com)
+[![Join 
chat](https://img.shields.io/badge/Mattermost-join_chat-brightgreen.svg)](https://chat.mycroft.ai/community/channels/machine-learning)
 
-### Features ###
+# Padatious
+
+An efficient and agile neural network intent parser. Padatious is a core 
component of [Mycroft AI](https://mycroft.ai). 
+
+## Features
 
  - Intents are easy to create
  - Requires a relatively small amount of data
@@ -10,11 +15,38 @@
  - Easily extract entities (ie. Find the nearest *gas station* -> `place: gas 
station`)
  - Fast training with a modular approach to neural networks
 
-### API Example ###
+## Getting Started
+
+### Installing
+
+Padatious requires the following native packages to be installed:
+
+ - [`FANN`][fann] (with dev headers)
+ - Python development headers
+ - `pip3`
+ - `swig`
+
+Ubuntu:
+
+```
+sudo apt-get install libfann-dev python3-dev python3-pip swig
+```
+
+Next, install Padatious via `pip3`:
+
+```
+pip3 install padatious
+```
+Padatious also works in Python 2 if you are unable to upgrade.
+
+
+[fann]:https://github.com/libfann/fann
+
+### Example
 
 Here's a simple example of how to use Padatious:
 
-**program.py**:
+#### program.py
 ```Python
 from padatious import IntentContainer
 
@@ -36,27 +68,6 @@
 python3 program.py
 ```
 
-### Installing ###
-
-Padatious requires the following native packages to be installed:
-
- - [`FANN`][fann] (with dev headers)
- - Python development headers
- - `pip3`
- - `swig`
-
-Ubuntu:
-
-```
-sudo apt-get install libfann-dev python3-dev python3-pip swig
-```
-
-Next, install Padatious via `pip3`:
-
-```
-pip3 install padatious
-```
-Padatious also works in Python 2 if you are unable to upgrade.
-
+## Learn More
 
-[fann]:https://github.com/libfann/fann
+Further documentation can be found at 
https://mycroft.ai/documentation/padatious/
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/padatious-0.4.7/padatious/intent_container.py 
new/padatious-0.4.8/padatious/intent_container.py
--- old/padatious-0.4.7/padatious/intent_container.py   2019-03-30 
08:05:14.000000000 +0100
+++ new/padatious-0.4.8/padatious/intent_container.py   2020-05-25 
11:49:26.000000000 +0200
@@ -68,8 +68,51 @@
         self.train_thread = None
         self.serialized_args = []
 
+    def instantiate_from_disk(self):
+        """
+        Instantiates the necessary (internal) data structures when loading 
persisted model from disk.
+        This is done via injecting entities and intents back from cached file 
versions.
+        """
+
+        entity_traindata = {}
+        intent_traindata = {}
+
+        # workaround: load training data for both entities and intents since
+        # padaos regex needs it for (re)compilation until TODO is cleared
+        for f in os.listdir(self.cache_dir):
+            if f.endswith('.entity'):
+                entity_name = f[0:f.find('.entity')]
+                with open(os.path.join(self.cache_dir, f), 'r') as d:
+                    entity_traindata[entity_name] = [line.strip()
+                                                     for line in d]
+
+            elif f.endswith('.intent'):
+                intent_name = f[0:f.find('.intent')]
+                with open(os.path.join(self.cache_dir, f), 'r') as d:
+                    intent_traindata[intent_name] = [line.strip()
+                                                     for line in d]
+
+        # TODO: padaos.compile (regex compilation) is redone when loading: find
+        # a way to persist regex, as well!
+        for f in os.listdir(self.cache_dir):
+
+            if f.startswith('{') and f.endswith('}.hash'):
+                entity_name = f[1:f.find('}.hash')]
+                self.add_entity(
+                    name=entity_name,
+                    lines=entity_traindata[entity_name],
+                    reload_cache=False,
+                    must_train=False)
+            elif not f.startswith('{') and f.endswith('.hash'):
+                intent_name = f[0:f.find('.hash')]
+                self.add_intent(
+                    name=intent_name,
+                    lines=intent_traindata[intent_name],
+                    reload_cache=False,
+                    must_train=False)
+
     @_save_args
-    def add_intent(self, name, lines, reload_cache=False):
+    def add_intent(self, name, lines, reload_cache=False, must_train=True):
         """
         Creates a new intent, optionally checking the cache first
 
@@ -78,18 +121,18 @@
             lines (list<str>): All the sentences that should activate the 
intent
             reload_cache: Whether to ignore cached intent if exists
         """
-        self.intents.add(name, lines, reload_cache)
+        self.intents.add(name, lines, reload_cache, must_train)
         self.padaos.add_intent(name, lines)
-        self.must_train = True
+        self.must_train = must_train
 
     @_save_args
-    def add_entity(self, name, lines, reload_cache=False):
+    def add_entity(self, name, lines, reload_cache=False, must_train=True):
         """
         Adds an entity that matches the given lines.
 
         Example:
             self.add_intent('weather', ['will it rain on {weekday}?'])
-            self.add_entity('{weekday}', ['monday', 'tuesday', 'wednesday'])  
# ...
+            self.add_entity('weekday', ['monday', 'tuesday', 'wednesday'])  # 
...
 
         Args:
             name (str): The name of the entity
@@ -97,12 +140,21 @@
             reload_cache (bool): Whether to refresh all of cache
         """
         Entity.verify_name(name)
-        self.entities.add(Entity.wrap_name(name), lines, reload_cache)
+        self.entities.add(
+            Entity.wrap_name(name),
+            lines,
+            reload_cache,
+            must_train)
         self.padaos.add_entity(name, lines)
-        self.must_train = True
+        self.must_train = must_train
 
     @_save_args
-    def load_entity(self, name, file_name, reload_cache=False):
+    def load_entity(
+            self,
+            name,
+            file_name,
+            reload_cache=False,
+            must_train=True):
         """
        Loads an entity, optionally checking the cache first
 
@@ -115,7 +167,7 @@
         self.entities.load(Entity.wrap_name(name), file_name, reload_cache)
         with open(file_name) as f:
             self.padaos.add_entity(name, f.read().split('\n'))
-        self.must_train = True
+        self.must_train = must_train
 
     @_save_args
     def load_file(self, *args, **kwargs):
@@ -123,7 +175,12 @@
         self.load_intent(*args, **kwargs)
 
     @_save_args
-    def load_intent(self, name, file_name, reload_cache=False):
+    def load_intent(
+            self,
+            name,
+            file_name,
+            reload_cache=False,
+            must_train=True):
         """
         Loads an intent, optionally checking the cache first
 
@@ -135,7 +192,7 @@
         self.intents.load(name, file_name, reload_cache)
         with open(file_name) as f:
             self.padaos.add_intent(name, f.read().split('\n'))
-        self.must_train = True
+        self.must_train = must_train
 
     @_save_args
     def remove_intent(self, name):
@@ -151,8 +208,16 @@
         self.padaos.remove_entity(name)
 
     def _train(self, *args, **kwargs):
-        t1 = Thread(target=self.intents.train, args=args, kwargs=kwargs, 
daemon=True)
-        t2 = Thread(target=self.entities.train, args=args, kwargs=kwargs, 
daemon=True)
+        t1 = Thread(
+            target=self.intents.train,
+            args=args,
+            kwargs=kwargs,
+            daemon=True)
+        t2 = Thread(
+            target=self.entities.train,
+            args=args,
+            kwargs=kwargs,
+            daemon=True)
         t1.start()
         t2.start()
         t1.join()
@@ -203,7 +268,9 @@
             '-k', json.dumps(kwargs),
         ])
         if ret == 2:
-            raise TypeError('Invalid train arguments: {} {}'.format(args, 
kwargs))
+            raise TypeError(
+                'Invalid train arguments: {} {}'.format(
+                    args, kwargs))
         data = self.serialized_args
         self.clear()
         self.apply_training_args(data)
@@ -214,7 +281,8 @@
         elif ret == 10:  # timeout
             return False
         else:
-            raise ValueError('Training failed and returned code: 
{}'.format(ret))
+            raise ValueError(
+                'Training failed and returned code: {}'.format(ret))
 
     def calc_intents(self, query):
         """
@@ -235,7 +303,8 @@
         sent = tokenize(query)
         for perfect_match in self.padaos.calc_intents(query):
             name = perfect_match['name']
-            intents[name] = MatchData(name, sent, 
matches=perfect_match['entities'], conf=1.0)
+            intents[name] = MatchData(
+                name, sent, matches=perfect_match['entities'], conf=1.0)
         return list(intents.values())
 
     def calc_intent(self, query):
@@ -252,8 +321,10 @@
         if len(matches) == 0:
             return MatchData('', '')
         best_match = max(matches, key=lambda x: x.conf)
-        best_matches = (match for match in matches if match.conf == 
best_match.conf)
-        return min(best_matches, key=lambda x: sum(map(len, 
x.matches.values())))
+        best_matches = (
+            match for match in matches if match.conf == best_match.conf)
+        return min(best_matches, key=lambda x: sum(
+            map(len, x.matches.values())))
 
     def get_training_args(self):
         return self.serialized_args
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/padatious-0.4.7/padatious/match_data.py 
new/padatious-0.4.8/padatious/match_data.py
--- old/padatious-0.4.7/padatious/match_data.py 2019-03-30 08:05:14.000000000 
+0100
+++ new/padatious-0.4.8/padatious/match_data.py 2020-05-25 11:49:26.000000000 
+0200
@@ -42,11 +42,40 @@
     def __repr__(self):
         return repr(self.__dict__)
 
+    @staticmethod
+    def handle_apostrophes(old_sentence):
+        """
+        Attempts to handle utterances with apostrophes in them
+        """
+        new_sentence = ''
+        apostrophe_present = False
+        for word in old_sentence:
+            if word == "'":
+                apostrophe_present = True
+                new_sentence += word
+            else:
+                # If the apostrophe is present we don't want to add
+                # a whitespace after the apostrophe
+                if apostrophe_present:
+                    # If the word after the apostrophe is longer than a 
character long assume that
+                    # the previous word is an "s" + apostrophe instead of 
"word + apostrophe
+                    if len(word) > 1:
+                        new_sentence += " " + word
+                    else:
+                        new_sentence += word
+                        apostrophe_present = False
+                else:
+                    if len(new_sentence) > 0:
+                        new_sentence += " " + word
+                    else:
+                        new_sentence = word
+        return new_sentence
     # Converts parameters from lists of tokens to one combined string
     def detokenize(self):
-        self.sent = ' '.join(self.sent)
+        self.sent = self.handle_apostrophes(self.sent)
+
         new_matches = {}
         for token, sent in self.matches.items():
             new_token = token.replace('{', '').replace('}', '')
-            new_matches[new_token] = ' '.join(sent)
+            new_matches[new_token] = self.handle_apostrophes(sent)
         self.matches = new_matches
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/padatious-0.4.7/padatious/training_manager.py 
new/padatious-0.4.8/padatious/training_manager.py
--- old/padatious-0.4.7/padatious/training_manager.py   2019-03-30 
08:05:14.000000000 +0100
+++ new/padatious-0.4.8/padatious/training_manager.py   2020-05-25 
11:49:26.000000000 +0200
@@ -37,6 +37,7 @@
         cls (Type[Trainable]): Class to wrap
         cache_dir (str): Place to store cache files
     """
+
     def __init__(self, cls, cache_dir):
         self.cls = cls
         self.cache = cache_dir
@@ -45,19 +46,32 @@
 
         self.train_data = TrainData()
 
-    def add(self, name, lines, reload_cache=False):
-        hash_fn = join(self.cache, name + '.hash')
-        old_hsh = None
-        if isfile(hash_fn):
-            with open(hash_fn, 'rb') as g:
-                old_hsh = g.read()
-        min_ver = splitext(padatious.__version__)[0]
-        new_hsh = lines_hash([min_ver] + lines)
-        if reload_cache or old_hsh != new_hsh:
-            self.objects_to_train.append(self.cls(name=name, hsh=new_hsh))
+    def add(self, name, lines, reload_cache=False, must_train=True):
+
+                # special case: load persisted (aka. cached) resource (i.e.
+                # entity or intent) from file into memory data structures
+        if not must_train:
+            self.objects.append(
+                self.cls.from_file(
+                    name=name,
+                    folder=self.cache))
+            # general case: load resource (entity or intent) to training queue
+            # or if no change occurred to memory data structures
         else:
-            self.objects.append(self.cls.from_file(name=name, 
folder=self.cache))
-        self.train_data.add_lines(name, lines)
+            hash_fn = join(self.cache, name + '.hash')
+            old_hsh = None
+            if isfile(hash_fn):
+                with open(hash_fn, 'rb') as g:
+                    old_hsh = g.read()
+            min_ver = splitext(padatious.__version__)[0]
+            new_hsh = lines_hash([min_ver] + lines)
+            if reload_cache or old_hsh != new_hsh:
+                self.objects_to_train.append(self.cls(name=name, hsh=new_hsh))
+            else:
+                self.objects.append(
+                    self.cls.from_file(
+                        name=name, folder=self.cache))
+            self.train_data.add_lines(name, lines)
 
     def load(self, name, file_name, reload_cache=False):
         with open(file_name) as f:
@@ -65,13 +79,16 @@
 
     def remove(self, name):
         self.objects = [i for i in self.objects if i.name != name]
-        self.objects_to_train = [i for i in self.objects_to_train if i.name != 
name]
+        self.objects_to_train = [
+            i for i in self.objects_to_train if i.name != name]
         self.train_data.remove_lines(name)
 
     def train(self, debug=True, single_thread=False, timeout=20):
         train = partial(
-            _train_and_save, cache=self.cache, data=self.train_data, 
print_updates=debug
-        )
+            _train_and_save,
+            cache=self.cache,
+            data=self.train_data,
+            print_updates=debug)
 
         if single_thread:
             for i in self.objects_to_train:
@@ -91,7 +108,10 @@
         # Load saved objects from disk
         for obj in self.objects_to_train:
             try:
-                self.objects.append(self.cls.from_file(name=obj.name, 
folder=self.cache))
+                self.objects.append(
+                    self.cls.from_file(
+                        name=obj.name,
+                        folder=self.cache))
             except IOError:
                 if debug:
                     print('Took too long to train', obj.name)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/padatious-0.4.7/tests/test_container.py 
new/padatious-0.4.8/tests/test_container.py
--- old/padatious-0.4.7/tests/test_container.py 2019-03-30 08:05:14.000000000 
+0100
+++ new/padatious-0.4.8/tests/test_container.py 2020-05-25 11:49:26.000000000 
+0200
@@ -24,8 +24,14 @@
 
 
 class TestIntentContainer:
-    test_lines = ['this is a test', 'another test']
-    other_lines = ['something else', 'this is a different thing']
+    test_lines = ['this is a test\n', 'another test\n']
+    other_lines = ['something else\n', 'this is a different thing\n']
+    test_lines_with_entities = ['this is a {test}\n', 'another {test}\n']
+    other_lines_with_entities = [
+        'something {other}\n',
+        'this is a {other} thing\n']
+    test_entities = ['test\n', 'assessment\n']
+    other_entities = ['else\n', 'different\n']
 
     def setup(self):
         self.cont = IntentContainer('temp')
@@ -59,12 +65,55 @@
         test(False, False)
         test(True, True)
 
+    def _write_train_data(self):
+
+        if not isdir('temp'):
+            mkdir('temp')
+
+        fn1 = join('temp', 'test.intent')
+        with open(fn1, 'w') as f:
+            f.writelines(self.test_lines_with_entities)
+
+        fn2 = join('temp', 'other.intent')
+        with open(fn2, 'w') as f:
+            f.writelines(self.other_lines_with_entities)
+
+        fn1 = join('temp', 'test.entity')
+        with open(fn1, 'w') as f:
+            f.writelines(self.test_entities)
+
+        fn2 = join('temp', 'other.entity')
+        with open(fn2, 'w') as f:
+            f.writelines(self.other_entities)
+
+    def test_instantiate_from_disk(self):
+        # train and cache (i.e. persist)
+        self.setup()
+        self.test_add_intent()
+        self.cont.add_entity('test', self.test_entities)
+        self.cont.add_entity('other', self.other_entities)
+        self.cont.train()
+        self._write_train_data()
+
+        # instantiate from disk (load cached files)
+        self.setup()
+        self.cont.instantiate_from_disk()
+
+        assert len(self.cont.intents.train_data.sent_lists) == 0
+        assert len(self.cont.intents.objects_to_train) == 0
+        assert len(self.cont.intents.objects) == 2
+
+        result = self.cont.calc_intent('something different')
+        assert result.matches['other'] == 'different'
+
     def _create_large_intent(self, depth):
         if depth == 0:
             return '(a|b|)'
         return '{0} {0}'.format(self._create_large_intent(depth - 1))
 
-    @pytest.mark.skipif(not os.environ.get('RUN_LONG'), reason="Takes a long 
time")
+    @pytest.mark.skipif(
+        not os.environ.get('RUN_LONG'),
+        reason="Takes a long time")
     def test_train_timeout(self):
         self.cont.add_intent('a', [
             ' '.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in 
range(5))
@@ -116,7 +165,9 @@
         self.cont.train(False)
 
         intents = self.cont.calc_intents('this is another test')
-        assert (intents[0].conf > intents[1].conf) == (intents[0].name == 
'test')
+        assert (
+            intents[0].conf > intents[1].conf) == (
+            intents[0].name == 'test')
         assert self.cont.calc_intent('this is another test').name == 'test'
 
     def test_empty(self):
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/padatious-0.4.7/tests/test_match_data.py 
new/padatious-0.4.8/tests/test_match_data.py
--- old/padatious-0.4.7/tests/test_match_data.py        2019-03-30 
08:05:14.000000000 +0100
+++ new/padatious-0.4.8/tests/test_match_data.py        2020-05-25 
11:49:26.000000000 +0200
@@ -18,6 +18,8 @@
 class TestMatchData:
     def setup(self):
         self.match = MatchData('name', ['one', 'two'], {'{word}': ['value', 
'tokens']}, 0.5)
+        self.sentence = ["it", "'", "s", "a", "new", "sentence"]
+        self.sentence2 = ["the", "parents", "'", "house"]
 
     def test_detokenize(self):
         self.match.detokenize()
@@ -25,3 +27,9 @@
 
         correct_match = MatchData('name', 'one two', {'word': 'value tokens'}, 
0.5)
         assert self.match.__dict__ == correct_match.__dict__
+
+    def test_handle_apostrophes(self):
+        joined_sentence = self.match.handle_apostrophes(self.sentence)
+        joined_sentence2 = self.match.handle_apostrophes(self.sentence2)
+        assert joined_sentence == "it's a new sentence"
+        assert joined_sentence2 == "the parents' house"

commit python-padatious for openSUSE:Factory

Reply via email to