Hello community, here is the log from the commit of package python-featureflow for openSUSE:Factory checked in at 2019-01-08 12:25:59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-featureflow (Old) and /work/SRC/openSUSE:Factory/.python-featureflow.new.28833 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-featureflow" Tue Jan 8 12:25:59 2019 rev:2 rq:660787 version:2.12.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-featureflow/python-featureflow.changes 2018-11-08 09:49:29.573028908 +0100 +++ /work/SRC/openSUSE:Factory/.python-featureflow.new.28833/python-featureflow.changes 2019-01-08 12:28:21.532280010 +0100 @@ -1,0 +2,9 @@ +Sat Dec 22 03:41:42 UTC 2018 - Todd R <[email protected]> + +- Update tp 2.12.1 + * non-buggy pandoc usage + * support for numpy 1.15 +- Rebase fix_certifi_dependency.patch +- Remove upstream-included fix_numpy_recarrays.patch + +------------------------------------------------------------------- Old: ---- LICENSE.txt featureflow-2.9.0.tar.gz fix_numpy_recarrays.patch New: ---- featureflow-2.12.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-featureflow.spec ++++++ --- /var/tmp/diff_new_pack.EDnkPV/_old 2019-01-08 12:28:21.924279582 +0100 +++ /var/tmp/diff_new_pack.EDnkPV/_new 2019-01-08 12:28:21.924279582 +0100 @@ -12,23 +12,21 @@ # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. -# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-featureflow -Version: 2.9.0 +Version: 2.12.1 Release: 0 -License: MIT Summary: A python library for building feature extraction pipelines -Url: https://github.com/JohnVinyard/featureflow +License: MIT Group: Development/Languages/Python +Url: https://github.com/JohnVinyard/featureflow Source0: https://files.pythonhosted.org/packages/source/f/featureflow/featureflow-%{version}.tar.gz -Source10: https://raw.githubusercontent.com/JohnVinyard/featureflow/%{version}/LICENSE.txt # PATCH-FIX-OPENSUSE fix_certifi_dependency.patch -- loosen certifi version dependency Patch0: fix_certifi_dependency.patch -# PATCH-FIX-UPSTREAM fix_numpy_recarrays.patch -- Fix handling of numpy recarrays -- https://github.com/JohnVinyard/featureflow/issues/7 -Patch1: fix_numpy_recarrays.patch BuildRequires: %{python_module setuptools} BuildRequires: fdupes BuildRequires: python-rpm-macros @@ -59,9 +57,7 @@ %prep %setup -q -n featureflow-%{version} -cp %{SOURCE10} . %patch0 -p1 -%patch1 -p1 %build %python_build ++++++ featureflow-2.9.0.tar.gz -> featureflow-2.12.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/LICENSE.txt new/featureflow-2.12.1/LICENSE.txt --- old/featureflow-2.9.0/LICENSE.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/featureflow-2.12.1/LICENSE.txt 2016-03-08 03:09:52.000000000 +0100 @@ -0,0 +1,8 @@ +The MIT License (MIT) +Copyright (c) 2016 John Vinyard + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/MANIFEST.in new/featureflow-2.12.1/MANIFEST.in --- old/featureflow-2.9.0/MANIFEST.in 2016-03-08 03:04:01.000000000 +0100 +++ new/featureflow-2.12.1/MANIFEST.in 2018-11-06 02:02:50.000000000 +0100 @@ -1,2 +1,6 @@ include README.md -include MANIFEST.in \ No newline at end of file +include MANIFEST.in +include LICENSE.txt +include requirements.txt + +recursive-include examples *.py diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/PKG-INFO new/featureflow-2.12.1/PKG-INFO --- old/featureflow-2.9.0/PKG-INFO 2018-06-26 16:21:10.000000000 +0200 +++ new/featureflow-2.12.1/PKG-INFO 2018-11-07 03:27:31.000000000 +0100 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: featureflow -Version: 2.9.0 +Version: 2.12.1 Summary: UNKNOWN Home-page: https://github.com/JohnVinyard/featureflow Author: John Vinyard Author-email: [email protected] License: UNKNOWN -Download-URL: https://github.com/jvinyard/featureflow/tarball/2.9.0 +Download-URL: https://github.com/jvinyard/featureflow/tarball/2.12.1 Description-Content-Type: UNKNOWN Description: |Build Status| |Coverage Status| |PyPI| diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/examples/wordcount.py new/featureflow-2.12.1/examples/wordcount.py --- old/featureflow-2.9.0/examples/wordcount.py 1970-01-01 01:00:00.000000000 +0100 +++ new/featureflow-2.12.1/examples/wordcount.py 2018-06-26 15:55:58.000000000 +0200 @@ -0,0 +1,141 @@ +from __future__ import print_function +import featureflow as ff +import argparse +from collections import Counter +import re +import hashlib + + +class Tokenizer(ff.Node): + """ + Tokenize a stream of text into individual, normalized (lowercase) + words/tokens + """ + def __init__(self, needs=None): + super(Tokenizer, self).__init__(needs=needs) + self._cache = '' + self._pattern = re.compile('(?P<word>[a-zA-Z]+)\W+') + + def _enqueue(self, data, pusher): + self._cache += data + + def _dequeue(self): + matches = list(self._pattern.finditer(self._cache)) + if not matches: + raise ff.NotEnoughData() + last_boundary = matches[-1].end() + self._cache = self._cache[last_boundary:] + return matches + + def _process(self, data): + yield map(lambda x: x.groupdict()['word'].lower(), data) + + +class WordCount(ff.Aggregator, ff.Node): + """ + Keep track of token frequency + """ + def __init__(self, needs=None): + super(WordCount, self).__init__(needs=needs) + self._cache = Counter() + + def _enqueue(self, data, pusher): + self._cache.update(data) + + +class CheckSum(ff.Aggregator, ff.Node): + """ + Compute the checksum of a text stream + """ + def __init__(self, needs=None): + super(CheckSum, self).__init__(needs=needs) + self._cache = hashlib.sha256() + + def _enqueue(self, data, pusher): + self._cache.update(data) + + def _process(self, data): + yield data.hexdigest() + + [email protected]_in_memory_settings +class Document(ff.BaseModel): + """ + Define the processing graph needed to extract document-level features, + whether, and how those features should be persisted. + """ + raw = ff.ByteStreamFeature( + ff.ByteStream, + chunksize=128, + store=True) + + checksum = ff.JSONFeature( + CheckSum, + needs=raw, + store=True) + + tokens = ff.Feature( + Tokenizer, + needs=raw, + store=False) + + counts = ff.JSONFeature( + WordCount, + needs=tokens, + store=True) + + [email protected]_in_memory_settings +class Corpus(ff.BaseModel): + """ + Define the processing graph needed to extract corpus-level features, + whether, and how those features should be persisted. + """ + docs = ff.Feature( + lambda doc_cls: (doc.counts for doc in doc_cls), + store=False) + + total_counts = ff.JSONFeature( + WordCount, + needs=docs, + store=True) + + +def process_urls(urls): + for url in urls: + Document.process(raw=url) + + +def summarize_document(doc): + return 'doc {_id} with checksum {cs} contains "the" {n} times'.format( + _id=doc._id, + cs=doc.checksum, + n=doc.counts.get('the', 0)) + + +def process_corpus(document_cls): + corpus_id = Corpus.process(docs=document_cls) + return Corpus(corpus_id) + + +def summarize_corpus(corpus): + return 'The entire text corpus contains "the" {n} times'.format( + n=corpus.total_counts.get("the", 0)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--url', + help='specify one or more urls of text files to ingest', + required=True, + action='append') + args = parser.parse_args() + + process_urls(args.url) + + for doc in Document: + print(summarize_document(doc)) + + corpus = process_corpus(Document) + print(summarize_corpus(corpus)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/featureflow/__init__.py new/featureflow-2.12.1/featureflow/__init__.py --- old/featureflow-2.9.0/featureflow/__init__.py 2018-06-26 16:19:22.000000000 +0200 +++ new/featureflow-2.12.1/featureflow/__init__.py 2018-11-07 03:23:27.000000000 +0100 @@ -1,4 +1,4 @@ -__version__ = '2.9.0' +__version__ = '2.12.1' from model import BaseModel, ModelExistsError diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/featureflow/nmpy.py new/featureflow-2.12.1/featureflow/nmpy.py --- old/featureflow-2.9.0/featureflow/nmpy.py 2018-03-01 20:04:43.000000000 +0100 +++ new/featureflow-2.12.1/featureflow/nmpy.py 2018-11-07 02:45:14.000000000 +0100 @@ -77,8 +77,29 @@ def __init__(self, needs=None): super(PackedNumpyEncoder, self).__init__(needs=needs) + def _pack_recarray(self, recarr): + fields = recarr.dtype.fields + + packed_data = dict() + new_dtype = [] + + for name in fields.iterkeys(): + view = recarr[name].copy().view(np.uint8) \ + .reshape(recarr.shape + (-1,)) + packed_data[name] = view + new_dtype.append((name, np.uint8, view.shape[1:])) + + packed_recarray = np.recarray(recarr.shape, dtype=new_dtype) + + for name, value in packed_data.iteritems(): + packed_recarray[name] = value + return packed_recarray + def _prepare_data(self, data): - return np.packbits(data.astype(np.uint8), axis=-1) + try: + return np.packbits(data.astype(np.uint8), axis=-1) + except ValueError: + return self._pack_recarray(data) def _np_from_buffer(b, shape, dtype): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/featureflow.egg-info/PKG-INFO new/featureflow-2.12.1/featureflow.egg-info/PKG-INFO --- old/featureflow-2.9.0/featureflow.egg-info/PKG-INFO 2018-06-26 16:21:10.000000000 +0200 +++ new/featureflow-2.12.1/featureflow.egg-info/PKG-INFO 2018-11-07 03:27:31.000000000 +0100 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: featureflow -Version: 2.9.0 +Version: 2.12.1 Summary: UNKNOWN Home-page: https://github.com/JohnVinyard/featureflow Author: John Vinyard Author-email: [email protected] License: UNKNOWN -Download-URL: https://github.com/jvinyard/featureflow/tarball/2.9.0 +Download-URL: https://github.com/jvinyard/featureflow/tarball/2.12.1 Description-Content-Type: UNKNOWN Description: |Build Status| |Coverage Status| |PyPI| diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/featureflow.egg-info/SOURCES.txt new/featureflow-2.12.1/featureflow.egg-info/SOURCES.txt --- old/featureflow-2.9.0/featureflow.egg-info/SOURCES.txt 2018-06-26 16:21:10.000000000 +0200 +++ new/featureflow-2.12.1/featureflow.egg-info/SOURCES.txt 2018-11-07 03:27:31.000000000 +0100 @@ -1,7 +1,10 @@ +LICENSE.txt MANIFEST.in README.md +requirements.txt setup.cfg setup.py +examples/wordcount.py featureflow/__init__.py featureflow/bytestream.py featureflow/data.py diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/featureflow.egg-info/requires.txt new/featureflow-2.12.1/featureflow.egg-info/requires.txt --- old/featureflow-2.9.0/featureflow.egg-info/requires.txt 2018-06-26 16:21:10.000000000 +0200 +++ new/featureflow-2.12.1/featureflow.egg-info/requires.txt 2018-11-07 03:27:31.000000000 +0100 @@ -5,3 +5,6 @@ requests lmdb redis + +[numpy] +numpy==1.15.3 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/requirements.txt new/featureflow-2.12.1/requirements.txt --- old/featureflow-2.9.0/requirements.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/featureflow-2.12.1/requirements.txt 2017-05-13 04:26:57.000000000 +0200 @@ -0,0 +1,5 @@ +redis +nose +unittest2 +requests +lmdb \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/featureflow-2.9.0/setup.py new/featureflow-2.12.1/setup.py --- old/featureflow-2.9.0/setup.py 2017-10-17 22:17:09.000000000 +0200 +++ new/featureflow-2.12.1/setup.py 2018-11-07 03:23:13.000000000 +0100 @@ -1,37 +1,41 @@ from setuptools import setup import re +import subprocess try: - import pypandoc - long_description = pypandoc.convert('README.md', 'rst') -except(IOError, ImportError): + long_description = subprocess.check_output( + 'pandoc --to rst README.md', shell=True) +except(IOError, ImportError, subprocess.CalledProcessError): long_description = open('README.md').read() with open('featureflow/__init__.py', 'r') as fd: version = re.search( - r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', - fd.read(), - re.MULTILINE).group(1) + r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', + fd.read(), + re.MULTILINE).group(1) -download_url = 'https://github.com/jvinyard/featureflow/tarball/{version}'\ +download_url = 'https://github.com/jvinyard/featureflow/tarball/{version}' \ .format(**locals()) setup( - name='featureflow', - version=version, - url='https://github.com/JohnVinyard/featureflow', - author='John Vinyard', - author_email='[email protected]', - long_description=long_description, - packages=['featureflow'], - download_url=download_url, - install_requires=[ - 'dill', - 'nose', - 'unittest2', - 'certifi==2017.7.27.1', - 'requests', - 'lmdb', - 'redis' - ] + name='featureflow', + version=version, + url='https://github.com/JohnVinyard/featureflow', + author='John Vinyard', + author_email='[email protected]', + long_description=long_description, + packages=['featureflow'], + download_url=download_url, + install_requires=[ + 'dill', + 'nose', + 'unittest2', + 'certifi==2017.7.27.1', + 'requests', + 'lmdb', + 'redis' + ], + extras_require={ + 'numpy': ['numpy==1.15.3'] + } ) ++++++ fix_certifi_dependency.patch ++++++ --- /var/tmp/diff_new_pack.EDnkPV/_old 2019-01-08 12:28:21.996279503 +0100 +++ /var/tmp/diff_new_pack.EDnkPV/_new 2019-01-08 12:28:21.996279503 +0100 @@ -9,12 +9,12 @@ --- a/setup.py +++ b/setup.py -@@ -29,7 +29,7 @@ - 'dill', - 'nose', - 'unittest2', -- 'certifi==2017.7.27.1', -+ 'certifi>=2017.7.27.1', - 'requests', - 'lmdb', - 'redis' +@@ -30,7 +30,7 @@ + 'dill', + 'nose', + 'unittest2', +- 'certifi==2017.7.27.1', ++ 'certifi>=2017.7.27.1', + 'requests', + 'lmdb', + 'redis'
