Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package python-padatious for
openSUSE:Factory checked in at 2022-10-06 07:42:32
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-padatious (Old)
and /work/SRC/openSUSE:Factory/.python-padatious.new.2275 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-padatious"
Thu Oct 6 07:42:32 2022 rev:9 rq:1008240 version:0.4.8
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-padatious/python-padatious.changes
2020-05-26 17:17:29.751739093 +0200
+++
/work/SRC/openSUSE:Factory/.python-padatious.new.2275/python-padatious.changes
2022-10-06 07:42:43.708757350 +0200
@@ -1,0 +2,6 @@
+Tue Oct 4 22:31:49 UTC 2022 - Yogalakshmi Arunachalam <[email protected]>
+
+- version update to 0.4.8
+ * fix and tests for issue #23 (#23)
+
+-------------------------------------------------------------------
Old:
----
v0.4.7.tar.gz
New:
----
v0.4.8.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-padatious.spec ++++++
--- /var/tmp/diff_new_pack.Cydpjh/_old 2022-10-06 07:42:44.784759745 +0200
+++ /var/tmp/diff_new_pack.Cydpjh/_new 2022-10-06 07:42:44.788759754 +0200
@@ -1,7 +1,7 @@
#
# spec file for package python-padatious
#
-# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2022 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
%define skip_python2 1
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-padatious
-Version: 0.4.7
+Version: 0.4.8
Release: 0
Summary: A neural network intent parser
License: Apache-2.0
@@ -35,7 +35,7 @@
Requires: python-setuptools
Requires: python-xxhash
Requires(post): update-alternatives
-Requires(postun): update-alternatives
+Requires(postun):update-alternatives
BuildArch: noarch
# SECTION test requirements
BuildRequires: %{python_module fann2}
++++++ v0.4.7.tar.gz -> v0.4.8.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/padatious-0.4.7/README.md
new/padatious-0.4.8/README.md
--- old/padatious-0.4.7/README.md 2019-03-30 08:05:14.000000000 +0100
+++ new/padatious-0.4.8/README.md 2020-05-25 11:49:26.000000000 +0200
@@ -1,8 +1,13 @@
-# Padatious #
+[](LICENSE.md)
[](https://mycroft.ai/cla)
[](https://github.com/MycroftAI/contributors/blob/master/team/Mycroft%20Core.md)

-An efficient and agile neural network intent parser
+[](http://makeapullrequest.com)
+[](https://chat.mycroft.ai/community/channels/machine-learning)
-### Features ###
+# Padatious
+
+An efficient and agile neural network intent parser. Padatious is a core
component of [Mycroft AI](https://mycroft.ai).
+
+## Features
- Intents are easy to create
- Requires a relatively small amount of data
@@ -10,11 +15,38 @@
- Easily extract entities (ie. Find the nearest *gas station* -> `place: gas
station`)
- Fast training with a modular approach to neural networks
-### API Example ###
+## Getting Started
+
+### Installing
+
+Padatious requires the following native packages to be installed:
+
+ - [`FANN`][fann] (with dev headers)
+ - Python development headers
+ - `pip3`
+ - `swig`
+
+Ubuntu:
+
+```
+sudo apt-get install libfann-dev python3-dev python3-pip swig
+```
+
+Next, install Padatious via `pip3`:
+
+```
+pip3 install padatious
+```
+Padatious also works in Python 2 if you are unable to upgrade.
+
+
+[fann]:https://github.com/libfann/fann
+
+### Example
Here's a simple example of how to use Padatious:
-**program.py**:
+#### program.py
```Python
from padatious import IntentContainer
@@ -36,27 +68,6 @@
python3 program.py
```
-### Installing ###
-
-Padatious requires the following native packages to be installed:
-
- - [`FANN`][fann] (with dev headers)
- - Python development headers
- - `pip3`
- - `swig`
-
-Ubuntu:
-
-```
-sudo apt-get install libfann-dev python3-dev python3-pip swig
-```
-
-Next, install Padatious via `pip3`:
-
-```
-pip3 install padatious
-```
-Padatious also works in Python 2 if you are unable to upgrade.
-
+## Learn More
-[fann]:https://github.com/libfann/fann
+Further documentation can be found at
https://mycroft.ai/documentation/padatious/
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/padatious-0.4.7/padatious/intent_container.py
new/padatious-0.4.8/padatious/intent_container.py
--- old/padatious-0.4.7/padatious/intent_container.py 2019-03-30
08:05:14.000000000 +0100
+++ new/padatious-0.4.8/padatious/intent_container.py 2020-05-25
11:49:26.000000000 +0200
@@ -68,8 +68,51 @@
self.train_thread = None
self.serialized_args = []
+ def instantiate_from_disk(self):
+ """
+ Instantiates the necessary (internal) data structures when loading
persisted model from disk.
+ This is done via injecting entities and intents back from cached file
versions.
+ """
+
+ entity_traindata = {}
+ intent_traindata = {}
+
+ # workaround: load training data for both entities and intents since
+ # padaos regex needs it for (re)compilation until TODO is cleared
+ for f in os.listdir(self.cache_dir):
+ if f.endswith('.entity'):
+ entity_name = f[0:f.find('.entity')]
+ with open(os.path.join(self.cache_dir, f), 'r') as d:
+ entity_traindata[entity_name] = [line.strip()
+ for line in d]
+
+ elif f.endswith('.intent'):
+ intent_name = f[0:f.find('.intent')]
+ with open(os.path.join(self.cache_dir, f), 'r') as d:
+ intent_traindata[intent_name] = [line.strip()
+ for line in d]
+
+ # TODO: padaos.compile (regex compilation) is redone when loading: find
+ # a way to persist regex, as well!
+ for f in os.listdir(self.cache_dir):
+
+ if f.startswith('{') and f.endswith('}.hash'):
+ entity_name = f[1:f.find('}.hash')]
+ self.add_entity(
+ name=entity_name,
+ lines=entity_traindata[entity_name],
+ reload_cache=False,
+ must_train=False)
+ elif not f.startswith('{') and f.endswith('.hash'):
+ intent_name = f[0:f.find('.hash')]
+ self.add_intent(
+ name=intent_name,
+ lines=intent_traindata[intent_name],
+ reload_cache=False,
+ must_train=False)
+
@_save_args
- def add_intent(self, name, lines, reload_cache=False):
+ def add_intent(self, name, lines, reload_cache=False, must_train=True):
"""
Creates a new intent, optionally checking the cache first
@@ -78,18 +121,18 @@
lines (list<str>): All the sentences that should activate the
intent
reload_cache: Whether to ignore cached intent if exists
"""
- self.intents.add(name, lines, reload_cache)
+ self.intents.add(name, lines, reload_cache, must_train)
self.padaos.add_intent(name, lines)
- self.must_train = True
+ self.must_train = must_train
@_save_args
- def add_entity(self, name, lines, reload_cache=False):
+ def add_entity(self, name, lines, reload_cache=False, must_train=True):
"""
Adds an entity that matches the given lines.
Example:
self.add_intent('weather', ['will it rain on {weekday}?'])
- self.add_entity('{weekday}', ['monday', 'tuesday', 'wednesday'])
# ...
+ self.add_entity('weekday', ['monday', 'tuesday', 'wednesday']) #
...
Args:
name (str): The name of the entity
@@ -97,12 +140,21 @@
reload_cache (bool): Whether to refresh all of cache
"""
Entity.verify_name(name)
- self.entities.add(Entity.wrap_name(name), lines, reload_cache)
+ self.entities.add(
+ Entity.wrap_name(name),
+ lines,
+ reload_cache,
+ must_train)
self.padaos.add_entity(name, lines)
- self.must_train = True
+ self.must_train = must_train
@_save_args
- def load_entity(self, name, file_name, reload_cache=False):
+ def load_entity(
+ self,
+ name,
+ file_name,
+ reload_cache=False,
+ must_train=True):
"""
Loads an entity, optionally checking the cache first
@@ -115,7 +167,7 @@
self.entities.load(Entity.wrap_name(name), file_name, reload_cache)
with open(file_name) as f:
self.padaos.add_entity(name, f.read().split('\n'))
- self.must_train = True
+ self.must_train = must_train
@_save_args
def load_file(self, *args, **kwargs):
@@ -123,7 +175,12 @@
self.load_intent(*args, **kwargs)
@_save_args
- def load_intent(self, name, file_name, reload_cache=False):
+ def load_intent(
+ self,
+ name,
+ file_name,
+ reload_cache=False,
+ must_train=True):
"""
Loads an intent, optionally checking the cache first
@@ -135,7 +192,7 @@
self.intents.load(name, file_name, reload_cache)
with open(file_name) as f:
self.padaos.add_intent(name, f.read().split('\n'))
- self.must_train = True
+ self.must_train = must_train
@_save_args
def remove_intent(self, name):
@@ -151,8 +208,16 @@
self.padaos.remove_entity(name)
def _train(self, *args, **kwargs):
- t1 = Thread(target=self.intents.train, args=args, kwargs=kwargs,
daemon=True)
- t2 = Thread(target=self.entities.train, args=args, kwargs=kwargs,
daemon=True)
+ t1 = Thread(
+ target=self.intents.train,
+ args=args,
+ kwargs=kwargs,
+ daemon=True)
+ t2 = Thread(
+ target=self.entities.train,
+ args=args,
+ kwargs=kwargs,
+ daemon=True)
t1.start()
t2.start()
t1.join()
@@ -203,7 +268,9 @@
'-k', json.dumps(kwargs),
])
if ret == 2:
- raise TypeError('Invalid train arguments: {} {}'.format(args,
kwargs))
+ raise TypeError(
+ 'Invalid train arguments: {} {}'.format(
+ args, kwargs))
data = self.serialized_args
self.clear()
self.apply_training_args(data)
@@ -214,7 +281,8 @@
elif ret == 10: # timeout
return False
else:
- raise ValueError('Training failed and returned code:
{}'.format(ret))
+ raise ValueError(
+ 'Training failed and returned code: {}'.format(ret))
def calc_intents(self, query):
"""
@@ -235,7 +303,8 @@
sent = tokenize(query)
for perfect_match in self.padaos.calc_intents(query):
name = perfect_match['name']
- intents[name] = MatchData(name, sent,
matches=perfect_match['entities'], conf=1.0)
+ intents[name] = MatchData(
+ name, sent, matches=perfect_match['entities'], conf=1.0)
return list(intents.values())
def calc_intent(self, query):
@@ -252,8 +321,10 @@
if len(matches) == 0:
return MatchData('', '')
best_match = max(matches, key=lambda x: x.conf)
- best_matches = (match for match in matches if match.conf ==
best_match.conf)
- return min(best_matches, key=lambda x: sum(map(len,
x.matches.values())))
+ best_matches = (
+ match for match in matches if match.conf == best_match.conf)
+ return min(best_matches, key=lambda x: sum(
+ map(len, x.matches.values())))
def get_training_args(self):
return self.serialized_args
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/padatious-0.4.7/padatious/match_data.py
new/padatious-0.4.8/padatious/match_data.py
--- old/padatious-0.4.7/padatious/match_data.py 2019-03-30 08:05:14.000000000
+0100
+++ new/padatious-0.4.8/padatious/match_data.py 2020-05-25 11:49:26.000000000
+0200
@@ -42,11 +42,40 @@
def __repr__(self):
return repr(self.__dict__)
+ @staticmethod
+ def handle_apostrophes(old_sentence):
+ """
+ Attempts to handle utterances with apostrophes in them
+ """
+ new_sentence = ''
+ apostrophe_present = False
+ for word in old_sentence:
+ if word == "'":
+ apostrophe_present = True
+ new_sentence += word
+ else:
+ # If the apostrophe is present we don't want to add
+ # a whitespace after the apostrophe
+ if apostrophe_present:
+ # If the word after the apostrophe is longer than a
character long assume that
+ # the previous word is an "s" + apostrophe instead of
"word + apostrophe
+ if len(word) > 1:
+ new_sentence += " " + word
+ else:
+ new_sentence += word
+ apostrophe_present = False
+ else:
+ if len(new_sentence) > 0:
+ new_sentence += " " + word
+ else:
+ new_sentence = word
+ return new_sentence
# Converts parameters from lists of tokens to one combined string
def detokenize(self):
- self.sent = ' '.join(self.sent)
+ self.sent = self.handle_apostrophes(self.sent)
+
new_matches = {}
for token, sent in self.matches.items():
new_token = token.replace('{', '').replace('}', '')
- new_matches[new_token] = ' '.join(sent)
+ new_matches[new_token] = self.handle_apostrophes(sent)
self.matches = new_matches
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/padatious-0.4.7/padatious/training_manager.py
new/padatious-0.4.8/padatious/training_manager.py
--- old/padatious-0.4.7/padatious/training_manager.py 2019-03-30
08:05:14.000000000 +0100
+++ new/padatious-0.4.8/padatious/training_manager.py 2020-05-25
11:49:26.000000000 +0200
@@ -37,6 +37,7 @@
cls (Type[Trainable]): Class to wrap
cache_dir (str): Place to store cache files
"""
+
def __init__(self, cls, cache_dir):
self.cls = cls
self.cache = cache_dir
@@ -45,19 +46,32 @@
self.train_data = TrainData()
- def add(self, name, lines, reload_cache=False):
- hash_fn = join(self.cache, name + '.hash')
- old_hsh = None
- if isfile(hash_fn):
- with open(hash_fn, 'rb') as g:
- old_hsh = g.read()
- min_ver = splitext(padatious.__version__)[0]
- new_hsh = lines_hash([min_ver] + lines)
- if reload_cache or old_hsh != new_hsh:
- self.objects_to_train.append(self.cls(name=name, hsh=new_hsh))
+ def add(self, name, lines, reload_cache=False, must_train=True):
+
+ # special case: load persisted (aka. cached) resource (i.e.
+ # entity or intent) from file into memory data structures
+ if not must_train:
+ self.objects.append(
+ self.cls.from_file(
+ name=name,
+ folder=self.cache))
+ # general case: load resource (entity or intent) to training queue
+ # or if no change occurred to memory data structures
else:
- self.objects.append(self.cls.from_file(name=name,
folder=self.cache))
- self.train_data.add_lines(name, lines)
+ hash_fn = join(self.cache, name + '.hash')
+ old_hsh = None
+ if isfile(hash_fn):
+ with open(hash_fn, 'rb') as g:
+ old_hsh = g.read()
+ min_ver = splitext(padatious.__version__)[0]
+ new_hsh = lines_hash([min_ver] + lines)
+ if reload_cache or old_hsh != new_hsh:
+ self.objects_to_train.append(self.cls(name=name, hsh=new_hsh))
+ else:
+ self.objects.append(
+ self.cls.from_file(
+ name=name, folder=self.cache))
+ self.train_data.add_lines(name, lines)
def load(self, name, file_name, reload_cache=False):
with open(file_name) as f:
@@ -65,13 +79,16 @@
def remove(self, name):
self.objects = [i for i in self.objects if i.name != name]
- self.objects_to_train = [i for i in self.objects_to_train if i.name !=
name]
+ self.objects_to_train = [
+ i for i in self.objects_to_train if i.name != name]
self.train_data.remove_lines(name)
def train(self, debug=True, single_thread=False, timeout=20):
train = partial(
- _train_and_save, cache=self.cache, data=self.train_data,
print_updates=debug
- )
+ _train_and_save,
+ cache=self.cache,
+ data=self.train_data,
+ print_updates=debug)
if single_thread:
for i in self.objects_to_train:
@@ -91,7 +108,10 @@
# Load saved objects from disk
for obj in self.objects_to_train:
try:
- self.objects.append(self.cls.from_file(name=obj.name,
folder=self.cache))
+ self.objects.append(
+ self.cls.from_file(
+ name=obj.name,
+ folder=self.cache))
except IOError:
if debug:
print('Took too long to train', obj.name)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/padatious-0.4.7/tests/test_container.py
new/padatious-0.4.8/tests/test_container.py
--- old/padatious-0.4.7/tests/test_container.py 2019-03-30 08:05:14.000000000
+0100
+++ new/padatious-0.4.8/tests/test_container.py 2020-05-25 11:49:26.000000000
+0200
@@ -24,8 +24,14 @@
class TestIntentContainer:
- test_lines = ['this is a test', 'another test']
- other_lines = ['something else', 'this is a different thing']
+ test_lines = ['this is a test\n', 'another test\n']
+ other_lines = ['something else\n', 'this is a different thing\n']
+ test_lines_with_entities = ['this is a {test}\n', 'another {test}\n']
+ other_lines_with_entities = [
+ 'something {other}\n',
+ 'this is a {other} thing\n']
+ test_entities = ['test\n', 'assessment\n']
+ other_entities = ['else\n', 'different\n']
def setup(self):
self.cont = IntentContainer('temp')
@@ -59,12 +65,55 @@
test(False, False)
test(True, True)
+ def _write_train_data(self):
+
+ if not isdir('temp'):
+ mkdir('temp')
+
+ fn1 = join('temp', 'test.intent')
+ with open(fn1, 'w') as f:
+ f.writelines(self.test_lines_with_entities)
+
+ fn2 = join('temp', 'other.intent')
+ with open(fn2, 'w') as f:
+ f.writelines(self.other_lines_with_entities)
+
+ fn1 = join('temp', 'test.entity')
+ with open(fn1, 'w') as f:
+ f.writelines(self.test_entities)
+
+ fn2 = join('temp', 'other.entity')
+ with open(fn2, 'w') as f:
+ f.writelines(self.other_entities)
+
+ def test_instantiate_from_disk(self):
+ # train and cache (i.e. persist)
+ self.setup()
+ self.test_add_intent()
+ self.cont.add_entity('test', self.test_entities)
+ self.cont.add_entity('other', self.other_entities)
+ self.cont.train()
+ self._write_train_data()
+
+ # instantiate from disk (load cached files)
+ self.setup()
+ self.cont.instantiate_from_disk()
+
+ assert len(self.cont.intents.train_data.sent_lists) == 0
+ assert len(self.cont.intents.objects_to_train) == 0
+ assert len(self.cont.intents.objects) == 2
+
+ result = self.cont.calc_intent('something different')
+ assert result.matches['other'] == 'different'
+
def _create_large_intent(self, depth):
if depth == 0:
return '(a|b|)'
return '{0} {0}'.format(self._create_large_intent(depth - 1))
- @pytest.mark.skipif(not os.environ.get('RUN_LONG'), reason="Takes a long
time")
+ @pytest.mark.skipif(
+ not os.environ.get('RUN_LONG'),
+ reason="Takes a long time")
def test_train_timeout(self):
self.cont.add_intent('a', [
' '.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in
range(5))
@@ -116,7 +165,9 @@
self.cont.train(False)
intents = self.cont.calc_intents('this is another test')
- assert (intents[0].conf > intents[1].conf) == (intents[0].name ==
'test')
+ assert (
+ intents[0].conf > intents[1].conf) == (
+ intents[0].name == 'test')
assert self.cont.calc_intent('this is another test').name == 'test'
def test_empty(self):
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/padatious-0.4.7/tests/test_match_data.py
new/padatious-0.4.8/tests/test_match_data.py
--- old/padatious-0.4.7/tests/test_match_data.py 2019-03-30
08:05:14.000000000 +0100
+++ new/padatious-0.4.8/tests/test_match_data.py 2020-05-25
11:49:26.000000000 +0200
@@ -18,6 +18,8 @@
class TestMatchData:
def setup(self):
self.match = MatchData('name', ['one', 'two'], {'{word}': ['value',
'tokens']}, 0.5)
+ self.sentence = ["it", "'", "s", "a", "new", "sentence"]
+ self.sentence2 = ["the", "parents", "'", "house"]
def test_detokenize(self):
self.match.detokenize()
@@ -25,3 +27,9 @@
correct_match = MatchData('name', 'one two', {'word': 'value tokens'},
0.5)
assert self.match.__dict__ == correct_match.__dict__
+
+ def test_handle_apostrophes(self):
+ joined_sentence = self.match.handle_apostrophes(self.sentence)
+ joined_sentence2 = self.match.handle_apostrophes(self.sentence2)
+ assert joined_sentence == "it's a new sentence"
+ assert joined_sentence2 == "the parents' house"