Hello community, here is the log from the commit of package python-annoy for openSUSE:Factory checked in at 2020-01-17 16:06:21 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-annoy (Old) and /work/SRC/openSUSE:Factory/.python-annoy.new.26092 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-annoy" Fri Jan 17 16:06:21 2020 rev:8 rq:765071 version:1.16.3 Changes: -------- --- /work/SRC/openSUSE:Factory/python-annoy/python-annoy.changes 2019-10-02 12:00:53.398725018 +0200 +++ /work/SRC/openSUSE:Factory/.python-annoy.new.26092/python-annoy.changes 2020-01-17 16:07:19.944495260 +0100 @@ -1,0 +2,9 @@ +Thu Jan 16 17:16:30 UTC 2020 - Marketa Calabkova <[email protected]> + +- update to 1.16.3 + * some error message improvements + * doing more stack allocations instead of heap + * fixes to a few tests that were failing on other platforms +- Rebase patch reproducible.patch + +------------------------------------------------------------------- Old: ---- v1.16.0.tar.gz New: ---- v1.16.3.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-annoy.spec ++++++ --- /var/tmp/diff_new_pack.B8h3qf/_old 2020-01-17 16:07:21.092495803 +0100 +++ /var/tmp/diff_new_pack.B8h3qf/_new 2020-01-17 16:07:21.096495805 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-annoy # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2020 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,12 +18,12 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-annoy -Version: 1.16.0 +Version: 1.16.3 Release: 0 Summary: Approximation of Nearest Neighbors License: Apache-2.0 Group: Development/Languages/Python -Url: https://github.com/spotify/annoy +URL: https://github.com/spotify/annoy Source: https://github.com/spotify/annoy/archive/v%{version}.tar.gz # PATCH-FIX-OPENSUSE boo#1100677 Patch0: reproducible.patch ++++++ reproducible.patch ++++++ --- /var/tmp/diff_new_pack.B8h3qf/_old 2020-01-17 16:07:21.112495812 +0100 +++ /var/tmp/diff_new_pack.B8h3qf/_new 2020-01-17 16:07:21.112495812 +0100 @@ -3,22 +3,14 @@ https://bugzilla.opensuse.org/show_bug.cgi?id=1100677 -Index: annoy-1.16.0/setup.py +Index: annoy-1.16.3/setup.py =================================================================== ---- annoy-1.16.0.orig/setup.py -+++ annoy-1.16.0/setup.py -@@ -36,29 +36,10 @@ with codecs.open('README.rst', encoding= - long_description = readme_note + fobj.read() - - # Various platform-dependent extras -+cputune = [] - extra_compile_args = [] +--- annoy-1.16.3.orig/setup.py ++++ annoy-1.16.3/setup.py +@@ -38,22 +38,6 @@ with codecs.open('README.rst', encoding= + extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS'] extra_link_args = [] --if os.environ.get('TRAVIS') == 'true': -- # Resolving some annoying issue -- extra_compile_args += ['-mno-avx'] -- -# Not all CPUs have march as a tuning parameter -cputune = ['-march=native',] -if platform.machine() == 'ppc64le': @@ -35,6 +27,6 @@ - extra_compile_args += ['-std=c++11', '-mmacosx-version-min=10.9'] - extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.9'] - - setup(name='annoy', - version='1.16.0', - description='Approximate Nearest Neighbors in C++/Python optimized for memory usage and loading/saving to disk.', + # Manual configuration, you're on your own here. + manual_compiler_args = os.environ.get('ANNOY_COMPILER_ARGS', None) + if manual_compiler_args: ++++++ v1.16.0.tar.gz -> v1.16.3.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/.gitignore new/annoy-1.16.3/.gitignore --- old/annoy-1.16.0/.gitignore 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/.gitignore 2019-12-26 22:11:45.000000000 +0100 @@ -12,8 +12,8 @@ *.idea # testing -blah.ann -t.ann -x.tree -test.tree -foo.idx +*.ann +*.tree +*.annoy +*.idx +*.hdf5 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/.travis.yml new/annoy-1.16.3/.travis.yml --- old/annoy-1.16.0/.travis.yml 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/.travis.yml 2019-12-26 22:11:45.000000000 +0100 @@ -9,7 +9,10 @@ env: - TOXENV=py36 - python: 3.7 - dist: xenial + env: + - TOXENV=py37 + - os: osx + language: generic env: - TOXENV=py37 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/README.rst new/annoy-1.16.3/README.rst --- old/annoy-1.16.0/README.rst 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/README.rst 2019-12-26 22:11:45.000000000 +0100 @@ -57,9 +57,9 @@ import random f = 40 - t = AnnoyIndex(f) # Length of item vector that will be indexed - for i in xrange(1000): - v = [random.gauss(0, 1) for z in xrange(f)] + t = AnnoyIndex(f, 'angular') # Length of item vector that will be indexed + for i in range(1000): + v = [random.gauss(0, 1) for z in range(f)] t.add_item(i, v) t.build(10) # 10 trees @@ -67,7 +67,7 @@ # ... - u = AnnoyIndex(f) + u = AnnoyIndex(f, 'angular') u.load('test.ann') # super fast, will just mmap the file print(u.get_nns_by_item(0, 1000)) # will find the 1000 nearest neighbors @@ -76,7 +76,7 @@ Full Python API --------------- -* ``AnnoyIndex(f, metric='angular')`` returns a new index that's read-write and stores vector of ``f`` dimensions. Metric can be ``"angular"``, ``"euclidean"``, ``"manhattan"``, ``"hamming"``, or ``"dot"``. +* ``AnnoyIndex(f, metric)`` returns a new index that's read-write and stores vector of ``f`` dimensions. Metric can be ``"angular"``, ``"euclidean"``, ``"manhattan"``, ``"hamming"``, or ``"dot"``. * ``a.add_item(i, v)`` adds item ``i`` (any nonnegative integer) with vector ``v``. Note that it will allocate memory for ``max(i)+1`` items. * ``a.build(n_trees)`` builds a forest of ``n_trees`` trees. More trees gives higher precision when querying. After calling ``build``, no more items can be added. * ``a.save(fn, prefault=False)`` saves the index to disk and loads it (see next function). After saving, no more items can be added. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/RELEASE.md new/annoy-1.16.3/RELEASE.md --- old/annoy-1.16.0/RELEASE.md 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/RELEASE.md 2019-12-26 22:11:45.000000000 +0100 @@ -3,11 +3,11 @@ 1. Make sure you're on master. `git checkout master && git fetch && git reset --hard origin/master` 1. Update `setup.py` to the newest version, `git add setup.py && git commit -m "version 1.2.3"` -1. `python setup.py sdist` +1. `python setup.py sdist bdist_wheel` 1. `git tag -a v1.2.3 -m "version 1.2.3"` 1. `git push --tags origin master` to push the last version to Github 1. Go to https://github.com/spotify/annoy/releases and click "Draft a new release" -1. `twine upload dist/*` +1. `twine upload dist/annoy-1.2.3*` TODO ---- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/setup.py new/annoy-1.16.3/setup.py --- old/annoy-1.16.0/setup.py 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/setup.py 2019-12-26 22:11:45.000000000 +0100 @@ -19,7 +19,6 @@ import codecs import os import platform -import sys readme_note = """\ .. note:: @@ -36,13 +35,9 @@ long_description = readme_note + fobj.read() # Various platform-dependent extras -extra_compile_args = [] +extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS'] extra_link_args = [] -if os.environ.get('TRAVIS') == 'true': - # Resolving some annoying issue - extra_compile_args += ['-mno-avx'] - # Not all CPUs have march as a tuning parameter cputune = ['-march=native',] if platform.machine() == 'ppc64le': @@ -59,17 +54,25 @@ extra_compile_args += ['-std=c++11', '-mmacosx-version-min=10.9'] extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.9'] +# Manual configuration, you're on your own here. +manual_compiler_args = os.environ.get('ANNOY_COMPILER_ARGS', None) +if manual_compiler_args: + extra_compile_args = manual_compiler_args.split(',') +manual_linker_args = os.environ.get('ANNOY_LINKER_ARGS', None) +if manual_linker_args: + extra_link_args = manual_linker_args.split(',') + setup(name='annoy', - version='1.16.0', + version='1.16.3', description='Approximate Nearest Neighbors in C++/Python optimized for memory usage and loading/saving to disk.', packages=['annoy'], ext_modules=[ - Extension( - 'annoy.annoylib', ['src/annoymodule.cc'], - depends=['src/annoylib.h', 'src/kissrandom.h', 'src/mman.h'], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - ) + Extension( + 'annoy.annoylib', ['src/annoymodule.cc'], + depends=['src/annoylib.h', 'src/kissrandom.h', 'src/mman.h'], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + ) ], long_description=long_description, author='Erik Bernhardsson', @@ -87,5 +90,6 @@ 'Programming Language :: Python :: 3.6', ], keywords='nns, approximate nearest neighbor search', - setup_requires=['nose>=1.0'] - ) + setup_requires=['nose>=1.0'], + tests_require=['numpy', 'h5py'] + ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/src/annoylib.h new/annoy-1.16.3/src/annoylib.h --- old/annoy-1.16.0/src/annoylib.h 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/src/annoylib.h 2019-12-26 22:11:45.000000000 +0100 @@ -31,11 +31,16 @@ typedef unsigned char uint8_t; typedef signed __int32 int32_t; typedef unsigned __int64 uint64_t; +typedef signed __int64 int64_t; #else #include <stdint.h> #endif + #if defined(_MSC_VER) || defined(__MINGW32__) + // a bit hacky, but override some definitions to support 64 bit + #define off_t int64_t + #define lseek_getsize(fd) _lseeki64(fd, 0, SEEK_END) #ifndef NOMINMAX #define NOMINMAX #endif @@ -43,6 +48,7 @@ #include <windows.h> #else #include <sys/mman.h> + #define lseek_getsize(fd) lseek(fd, 0, SEEK_END) #endif #include <cerrno> @@ -66,6 +72,22 @@ #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } #endif +void set_error_from_errno(char **error, const char* msg) { + showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno); + if (error) { + *error = (char *)malloc(256); // TODO: win doesn't support snprintf + sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno); + } +} + +void set_error_from_string(char **error, const char* msg) { + showUpdate("%s\n", msg); + if (error) { + *error = (char *)malloc(strlen(msg) + 1); + strcpy(*error, msg); + } +} + #ifndef _MSC_VER #define popcount __builtin_popcountll @@ -74,12 +96,14 @@ #define popcount cole_popcount #endif -#ifndef NO_MANUAL_VECTORIZATION -#if defined(__AVX512F__) +#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402 +#pragma message "Using 512-bit AVX instructions" #define USE_AVX512 -#elif defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) +#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) +#pragma message "Using 128-bit AVX instructions" #define USE_AVX -#endif +#else +#pragma message "Using no AVX instructions" #endif #if defined(USE_AVX) || defined(USE_AVX512) @@ -348,7 +372,7 @@ size_t k = random.index(count); T di = ic * Distance::distance(p, nodes[k], f), dj = jc * Distance::distance(q, nodes[k], f); - T norm = cosine ? get_norm(nodes[k]->v, f) : 1.0; + T norm = cosine ? get_norm(nodes[k]->v, f) : 1; if (!(norm > T(0))) { continue; } @@ -440,18 +464,16 @@ if (dot != 0) return (dot > 0); else - return random.flip(); + return (bool)random.flip(); } template<typename S, typename T, typename Random> static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) { - Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid - Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid + Node<S, T>* p = (Node<S, T>*)alloca(s); + Node<S, T>* q = (Node<S, T>*)alloca(s); two_means<T, Random, Angular, Node<S, T> >(nodes, f, random, true, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; Base::normalize<T, Node<S, T> >(n, f); - free(p); - free(q); } template<typename T> static inline T normalized_distance(T distance) { @@ -517,8 +539,8 @@ template<typename S, typename T, typename Random> static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) { - Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid - Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid + Node<S, T>* p = (Node<S, T>*)alloca(s); + Node<S, T>* q = (Node<S, T>*)alloca(s); DotProduct::zero_value(p); DotProduct::zero_value(q); two_means<T, Random, DotProduct, Node<S, T> >(nodes, f, random, true, p, q); @@ -526,8 +548,6 @@ n->v[z] = p->v[z] - q->v[z]; n->dot_factor = p->dot_factor - q->dot_factor; DotProduct::normalize<T, Node<S, T> >(n, f); - free(p); - free(q); } template<typename T, typename Node> @@ -551,7 +571,7 @@ if (dot != 0) return (dot > 0); else - return random.flip(); + return (bool)random.flip(); } template<typename T> @@ -708,7 +728,7 @@ if (dot != 0) return (dot > 0); else - return random.flip(); + return (bool)random.flip(); } template<typename T> static inline T pq_distance(T distance, T margin, int child_nr) { @@ -730,8 +750,8 @@ } template<typename S, typename T, typename Random> static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) { - Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid - Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid + Node<S, T>* p = (Node<S, T>*)alloca(s); + Node<S, T>* q = (Node<S, T>*)alloca(s); two_means<T, Random, Euclidean, Node<S, T> >(nodes, f, random, false, p, q); for (int z = 0; z < f; z++) @@ -740,8 +760,6 @@ n->a = 0.0; for (int z = 0; z < f; z++) n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; - free(p); - free(q); } template<typename T> static inline T normalized_distance(T distance) { @@ -763,8 +781,8 @@ } template<typename S, typename T, typename Random> static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) { - Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid - Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid + Node<S, T>* p = (Node<S, T>*)alloca(s); + Node<S, T>* q = (Node<S, T>*)alloca(s); two_means<T, Random, Manhattan, Node<S, T> >(nodes, f, random, false, p, q); for (int z = 0; z < f; z++) @@ -773,8 +791,6 @@ n->a = 0.0; for (int z = 0; z < f; z++) n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; - free(p); - free(q); } template<typename T> static inline T normalized_distance(T distance) { @@ -791,6 +807,7 @@ template<typename S, typename T> class AnnoyIndexInterface { public: + // Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL virtual ~AnnoyIndexInterface() {}; virtual bool add_item(S item, const T* w, char** error=NULL) = 0; virtual bool build(int q, char** error=NULL) = 0; @@ -836,11 +853,13 @@ bool _verbose; int _fd; bool _on_disk; + bool _built; public: - AnnoyIndex(int f) : _f(f), _random() { + AnnoyIndex(int f) : _f(f), _random() { _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node _verbose = false; + _built = false; _K = (S) (((size_t) (_s - offsetof(Node, children))) / sizeof(S)); // Max number of descendants to fit into node reinitialize(); // Reset everything } @@ -859,8 +878,7 @@ template<typename W> bool add_item_impl(S item, const W& w, char** error=NULL) { if (_loaded) { - showUpdate("You can't add an item to a loaded index\n"); - if (error) *error = (char *)"You can't add an item to a loaded index"; + set_error_from_string(error, "You can't add an item to a loaded index"); return false; } _allocate_size(item + 1); @@ -887,15 +905,13 @@ _on_disk = true; _fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600); if (_fd == -1) { - showUpdate("Error: file descriptor is -1\n"); - if (error) *error = strerror(errno); + set_error_from_errno(error, "Unable to open"); _fd = 0; return false; } _nodes_size = 1; if (ftruncate(_fd, _s * _nodes_size) == -1) { - showUpdate("Error truncating file: %s\n", strerror(errno)); - if (error) *error = strerror(errno); + set_error_from_errno(error, "Unable to truncate"); return false; } #ifdef MAP_POPULATE @@ -908,8 +924,12 @@ bool build(int q, char** error=NULL) { if (_loaded) { - showUpdate("You can't build a loaded index\n"); - if (error) *error = (char *)"You can't build a loaded index"; + set_error_from_string(error, "You can't build a loaded index"); + return false; + } + + if (_built) { + set_error_from_string(error, "You can't build a built index"); return false; } @@ -944,30 +964,34 @@ if (_on_disk) { _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * _n_nodes); if (ftruncate(_fd, _s * _n_nodes)) { - // TODO: this probably creates an index in a corrupt state... not sure what to do - showUpdate("Error truncating file: %s\n", strerror(errno)); - if (error) *error = strerror(errno); - return false; + // TODO: this probably creates an index in a corrupt state... not sure what to do + set_error_from_errno(error, "Unable to truncate"); + return false; } _nodes_size = _n_nodes; } + _built = true; return true; } bool unbuild(char** error=NULL) { if (_loaded) { - showUpdate("You can't unbuild a loaded index\n"); - if (error) *error = (char *)"You can't unbuild a loaded index"; + set_error_from_string(error, "You can't unbuild a loaded index"); return false; } _roots.clear(); _n_nodes = _n_items; + _built = false; return true; } bool save(const char* filename, bool prefault=false, char** error=NULL) { + if (!_built) { + set_error_from_string(error, "You can't save an index that hasn't been built"); + return false; + } if (_on_disk) { return true; } else { @@ -976,20 +1000,17 @@ FILE *f = fopen(filename, "wb"); if (f == NULL) { - showUpdate("Unable to open: %s\n", strerror(errno)); - if (error) *error = strerror(errno); + set_error_from_errno(error, "Unable to open"); return false; } if (fwrite(_nodes, _s, _n_nodes, f) != (size_t) _n_nodes) { - showUpdate("Unable to write: %s\n", strerror(errno)); - if (error) *error = strerror(errno); + set_error_from_errno(error, "Unable to write"); return false; } if (fclose(f) == EOF) { - showUpdate("Unable to close: %s\n", strerror(errno)); - if (error) *error = strerror(errno); + set_error_from_errno(error, "Unable to close"); return false; } @@ -1030,15 +1051,23 @@ bool load(const char* filename, bool prefault=false, char** error=NULL) { _fd = open(filename, O_RDONLY, (int)0400); if (_fd == -1) { - showUpdate("Error: file descriptor is -1\n"); - if (error) *error = strerror(errno); + set_error_from_errno(error, "Unable to open"); _fd = 0; return false; } - off_t size = lseek(_fd, 0, SEEK_END); - if (size <= 0) { - showUpdate("Warning: index size %zu\n", (size_t)size); + off_t size = lseek_getsize(_fd); + if (size == -1) { + set_error_from_errno(error, "Unable to get size"); + return false; + } else if (size == 0) { + set_error_from_errno(error, "Size of file is zero"); + return false; + } else if (size % _s) { + // Something is fishy with this index! + set_error_from_errno(error, "Index size is not a multiple of vector size"); + return false; } + int flags = MAP_SHARED; if (prefault) { #ifdef MAP_POPULATE @@ -1048,12 +1077,6 @@ #endif } _nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0); - if (size % _s) { - // Something is fishy with this index! - showUpdate("Error: index size %zu is not a multiple of vector size %zu\n", (size_t)size, _s); - if (error) *error = (char *)"Index size is not a multiple of vector size"; - return false; - } _n_nodes = (S)(size / _s); // Find the roots by scanning the end of the file and taking the nodes with most descendants @@ -1072,6 +1095,7 @@ if (_roots.size() > 1 && _get(_roots.front())->children[0] == _get(_roots.back())->children[0]) _roots.pop_back(); _loaded = true; + _built = true; _n_items = m; if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m); return true; @@ -1096,7 +1120,7 @@ } S get_n_trees() const { - return _roots.size(); + return (S)_roots.size(); } void verbose(bool v) { @@ -1121,7 +1145,8 @@ void *old = _nodes; if (_on_disk) { - ftruncate(_fd, _s * new_nodes_size); + int rc = ftruncate(_fd, _s * new_nodes_size); + if (_verbose && rc) showUpdate("File truncation error\n"); _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * new_nodes_size); } else { _nodes = realloc(_nodes, _s * new_nodes_size); @@ -1170,7 +1195,7 @@ } vector<S> children_indices[2]; - Node* m = (Node*)malloc(_s); // TODO: avoid + Node* m = (Node*)alloca(_s); D::create_split(children, _f, _s, _random, m); for (size_t i = 0; i < indices.size(); i++) { @@ -1197,7 +1222,7 @@ // Set the vector to 0.0 for (int z = 0; z < _f; z++) - m->v[z] = 0.0; + m->v[z] = 0; for (size_t i = 0; i < indices.size(); i++) { S j = indices[i]; @@ -1217,13 +1242,12 @@ _allocate_size(_n_nodes + 1); S item = _n_nodes++; memcpy(_get(item), m, _s); - free(m); return item; } void _get_all_nns(const T* v, size_t n, size_t search_k, vector<S>* result, vector<T>* distances) const { - Node* v_node = (Node *)malloc(_s); // TODO: avoid + Node* v_node = (Node *)alloca(_s); D::template zero_value<Node>(v_node); memcpy(v_node->v, v, sizeof(T) * _f); D::init_node(v_node, _f); @@ -1259,7 +1283,7 @@ // Get distances for all items // To avoid calculating distance multiple times for any items, sort by id - sort(nns.begin(), nns.end()); + std::sort(nns.begin(), nns.end()); vector<pair<T, S> > nns_dist; S last = -1; for (size_t i = 0; i < nns.size(); i++) { @@ -1279,7 +1303,6 @@ distances->push_back(D::normalized_distance(nns_dist[i].first)); result->push_back(nns_dist[i].second); } - free(v_node); } }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/src/annoymodule.cc new/annoy-1.16.3/src/annoymodule.cc --- old/annoy-1.16.0/src/annoymodule.cc 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/src/annoymodule.cc 2019-12-26 22:11:45.000000000 +0100 @@ -185,6 +185,7 @@ if (!self->ptr->load(filename, prefault, &error)) { PyErr_SetString(PyExc_IOError, error); + free(error); return NULL; } Py_RETURN_TRUE; @@ -203,6 +204,7 @@ if (!self->ptr->save(filename, prefault, &error)) { PyErr_SetString(PyExc_IOError, error); + free(error); return NULL; } Py_RETURN_TRUE; @@ -361,6 +363,7 @@ char* error; if (!self->ptr->add_item(item, &w[0], &error)) { PyErr_SetString(PyExc_Exception, error); + free(error); return NULL; } @@ -378,6 +381,7 @@ if (!self->ptr->on_disk_build(filename, &error)) { PyErr_SetString(PyExc_IOError, error); + free(error); return NULL; } Py_RETURN_TRUE; @@ -399,6 +403,7 @@ Py_END_ALLOW_THREADS; if (!res) { PyErr_SetString(PyExc_Exception, error); + free(error); return NULL; } @@ -414,6 +419,7 @@ char* error; if (!self->ptr->unbuild(&error)) { PyErr_SetString(PyExc_Exception, error); + free(error); return NULL; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/src/mman.h new/annoy-1.16.3/src/mman.h --- old/annoy-1.16.0/src/mman.h 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/src/mman.h 2019-12-26 22:11:45.000000000 +0100 @@ -209,7 +209,8 @@ return -1; } -int ftruncate(int fd, unsigned int size) { +#if !defined(__MINGW32__) +inline int ftruncate(int fd, unsigned int size) { if (fd < 0) { errno = EBADF; return -1; @@ -232,5 +233,6 @@ return 0; } +#endif #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/test/angular_index_test.py new/annoy-1.16.3/test/angular_index_test.py --- old/annoy-1.16.0/test/angular_index_test.py 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/test/angular_index_test.py 2019-12-26 22:11:45.000000000 +0100 @@ -175,7 +175,7 @@ i.add_item(j, numpy.random.normal(size=f)) i.build(10) indices, dists = i.get_nns_by_item(0, 100000, include_distances=True) - self.assertTrue(max(dists) < 2.0) + self.assertLessEqual(max(dists), 2.0) self.assertAlmostEqual(min(dists), 0.0) def test_distance_consistency(self): @@ -193,9 +193,9 @@ u_norm = numpy.array(u) * numpy.dot(u, u)**-0.5 v_norm = numpy.array(v) * numpy.dot(v, v)**-0.5 # cos = numpy.clip(1 - cosine(u, v), -1, 1) # scipy returns 1 - cos - self.assertAlmostEqual(dist, numpy.dot(u_norm - v_norm, u_norm - v_norm) ** 0.5) + self.assertAlmostEqual(dist ** 2, numpy.dot(u_norm - v_norm, u_norm - v_norm)) # self.assertAlmostEqual(dist, (2*(1 - cos))**0.5) - self.assertAlmostEqual(dist, sum([(x-y)**2 for x, y in zip(u_norm, v_norm)])**0.5) + self.assertAlmostEqual(dist ** 2, sum([(x-y)**2 for x, y in zip(u_norm, v_norm)])) def test_only_one_item(self): # reported to annoy-user by Kireet Reddy @@ -223,5 +223,7 @@ a.add_item(0, [1, 0, 0]) a.build(10) a.save('1.ann') - self.assertEquals(a.get_nns_by_vector([1, 0, 0], 3, include_distances=True), ([0], [0.0])) + indices, dists = a.get_nns_by_vector([1, 0, 0], 3, include_distances=True) + self.assertEquals(indices, [0]) + self.assertAlmostEqual(dists[0] ** 2, 0.0) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/test/annoy_test.lua new/annoy-1.16.3/test/annoy_test.lua --- old/annoy-1.16.0/test/annoy_test.lua 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/test/annoy_test.lua 2019-12-26 22:11:45.000000000 +0100 @@ -496,16 +496,6 @@ assert.same(u, y) end) - it("save_without_build", function() - -- Issue #61 - local i = AnnoyIndex(10) - i:add_item(1000, randomVector(10, 0, 1)) - i:save('x.tree') - local j = AnnoyIndex(10) - j:load('x.tree') - j:build(10) - end) - it("on_disk_build", function() local f = 2 local i = AnnoyIndex(f, 'euclidean') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/test/dot_index_test.py new/annoy-1.16.3/test/dot_index_test.py --- old/annoy-1.16.0/test/dot_index_test.py 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/test/dot_index_test.py 2019-12-26 22:11:45.000000000 +0100 @@ -155,4 +155,4 @@ i.get_item_vector(a), i.get_item_vector(b) )) - self.assertEqual(dist, i.get_distance(a, b)) + self.assertAlmostEqual(dist, i.get_distance(a, b)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/test/index_test.py new/annoy-1.16.3/test/index_test.py --- old/annoy-1.16.0/test/index_test.py 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/test/index_test.py 2019-12-26 22:11:45.000000000 +0100 @@ -50,8 +50,11 @@ def test_save_twice(self): # Issue #100 t = AnnoyIndex(10, 'angular') - t.save("t.ann") - t.save("t.ann") + for i in range(100): + t.add_item(i, [random.gauss(0, 1) for z in range(10)]) + t.build(10) + t.save('t1.ann') + t.save('t2.ann') def test_load_save(self): # Issue #61 @@ -78,13 +81,11 @@ self.assertEqual(u, y) def test_save_without_build(self): - # Issue #61 - i = AnnoyIndex(10, 'angular') - i.add_item(1000, [random.gauss(0, 1) for z in range(10)]) - i.save('x.tree') - j = AnnoyIndex(10, 'angular') - j.load('x.tree') - self.assertRaises(Exception, j.build, 10) + t = AnnoyIndex(10, 'angular') + for i in range(100): + t.add_item(i, [random.gauss(0, 1) for z in range(10)]) + # Note: in earlier version, this was allowed (see eg #61) + self.assertRaises(Exception, t.save, 'x.tree') def test_unbuild_with_loaded_tree(self): i = AnnoyIndex(10, 'angular') @@ -184,25 +185,11 @@ t.add_item(i, v) t.build(10) - if sys.platform == "linux" or sys.platform == "linux2": - # linux - try: - t.save("/dev/full") - self.fail("didn't get expected exception") - except Exception as e: - self.assertTrue('No space left on device' in str(e)) - elif sys.platform == "darwin": - volume = "FULLDISK" - device = os.popen('hdiutil attach -nomount ram://64').read() - os.popen('diskutil erasevolume MS-DOS %s %s' % (volume, device)) - os.popen('touch "/Volumes/%s/full"' % volume) - try: - t.save('/Volumes/%s/annoy.tree' % volume) - self.fail("didn't get expected exception") - except Exception as e: - self.assertTrue('No space left on device' in str(e)) - finally: - os.popen("hdiutil detach %s" % device) + if os.name == 'nt': + path = 'Z:\\xyz.annoy' + else: + path = '/x/y/z.annoy' + self.assertRaises(Exception, t.save, path) def test_dimension_mismatch(self): t = AnnoyIndex(100, 'angular') @@ -224,6 +211,37 @@ t.build(10) t.save('test.annoy') - # Used to segfault + # Used to segfault: v = [random.gauss(0, 1) for z in range(100)] self.assertRaises(Exception, t.add_item, i, v) + + def test_build_twice(self): + # 420 + t = AnnoyIndex(100, 'angular') + for i in range(1000): + t.add_item(i, [random.gauss(0, 1) for z in range(100)]) + t.build(10) + # Used to segfault: + self.assertRaises(Exception, t.build, 10) + + def test_very_large_index(self): + # 388 + f = 3 + dangerous_size = 2**31 + size_per_vector = 4*(f+3) + n_vectors = int(dangerous_size / size_per_vector) + m = AnnoyIndex(3, 'angular') + m.verbose(True) + for i in range(100): + m.add_item(n_vectors+i, [random.gauss(0, 1) for z in range(f)]) + n_trees = 10 + m.build(n_trees) + path = 'test_big.annoy' + m.save(path) # Raises on Windows + + # Sanity check size of index + self.assertGreaterEqual(os.path.getsize(path), dangerous_size) + self.assertLess(os.path.getsize(path), dangerous_size + 100e3) + + # Sanity check number of trees + self.assertEquals(m.get_n_trees(), n_trees) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/annoy-1.16.0/tox.ini new/annoy-1.16.3/tox.ini --- old/annoy-1.16.0/tox.ini 2019-07-09 04:15:02.000000000 +0200 +++ new/annoy-1.16.3/tox.ini 2019-12-26 22:11:45.000000000 +0100 @@ -18,7 +18,7 @@ mkdir -p {env:GOPATH:}/src/annoyindex wget https://storage.googleapis.com/golang/go1.5.linux-amd64.tar.gz sudo tar -C /usr/local -xzf go1.5.linux-amd64.tar.gz - sudo add-apt-repository -y ppa:rosmo/swig3.0.7 + sudo add-apt-repository -y ppa:timsc/swig-3.0.12 sudo apt-get update -qq sudo apt-get install -y swig3.0 swig3.0 -go -intgosize 64 -cgo -c++ src/annoygomodule.i
