Hello community,

here is the log from the commit of package python-annoy for openSUSE:Factory 
checked in at 2020-01-17 16:06:21
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-annoy (Old)
 and      /work/SRC/openSUSE:Factory/.python-annoy.new.26092 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "python-annoy"

Fri Jan 17 16:06:21 2020 rev:8 rq:765071 version:1.16.3

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-annoy/python-annoy.changes        
2019-10-02 12:00:53.398725018 +0200
+++ /work/SRC/openSUSE:Factory/.python-annoy.new.26092/python-annoy.changes     
2020-01-17 16:07:19.944495260 +0100
@@ -1,0 +2,9 @@
+Thu Jan 16 17:16:30 UTC 2020 - Marketa Calabkova <[email protected]>
+
+- update to 1.16.3
+  * some error message improvements
+  * doing more stack allocations instead of heap
+  * fixes to a few tests that were failing on other platforms
+- Rebase patch reproducible.patch
+
+-------------------------------------------------------------------

Old:
----
  v1.16.0.tar.gz

New:
----
  v1.16.3.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-annoy.spec ++++++
--- /var/tmp/diff_new_pack.B8h3qf/_old  2020-01-17 16:07:21.092495803 +0100
+++ /var/tmp/diff_new_pack.B8h3qf/_new  2020-01-17 16:07:21.096495805 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package python-annoy
 #
-# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2020 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,12 +18,12 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-annoy
-Version:        1.16.0
+Version:        1.16.3
 Release:        0
 Summary:        Approximation of Nearest Neighbors
 License:        Apache-2.0
 Group:          Development/Languages/Python
-Url:            https://github.com/spotify/annoy
+URL:            https://github.com/spotify/annoy
 Source:         https://github.com/spotify/annoy/archive/v%{version}.tar.gz
 # PATCH-FIX-OPENSUSE boo#1100677
 Patch0:         reproducible.patch

++++++ reproducible.patch ++++++
--- /var/tmp/diff_new_pack.B8h3qf/_old  2020-01-17 16:07:21.112495812 +0100
+++ /var/tmp/diff_new_pack.B8h3qf/_new  2020-01-17 16:07:21.112495812 +0100
@@ -3,22 +3,14 @@
 
 https://bugzilla.opensuse.org/show_bug.cgi?id=1100677
 
-Index: annoy-1.16.0/setup.py
+Index: annoy-1.16.3/setup.py
 ===================================================================
---- annoy-1.16.0.orig/setup.py
-+++ annoy-1.16.0/setup.py
-@@ -36,29 +36,10 @@ with codecs.open('README.rst', encoding=
-     long_description = readme_note + fobj.read()
- 
- # Various platform-dependent extras
-+cputune = []
- extra_compile_args = []
+--- annoy-1.16.3.orig/setup.py
++++ annoy-1.16.3/setup.py
+@@ -38,22 +38,6 @@ with codecs.open('README.rst', encoding=
+ extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS']
  extra_link_args = []
  
--if os.environ.get('TRAVIS') == 'true':
--    # Resolving some annoying issue
--    extra_compile_args += ['-mno-avx']
--
 -# Not all CPUs have march as a tuning parameter
 -cputune = ['-march=native',]
 -if platform.machine() == 'ppc64le':
@@ -35,6 +27,6 @@
 -    extra_compile_args += ['-std=c++11', '-mmacosx-version-min=10.9']
 -    extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.9']
 -
- setup(name='annoy',
-       version='1.16.0',
-       description='Approximate Nearest Neighbors in C++/Python optimized for 
memory usage and loading/saving to disk.',
+ # Manual configuration, you're on your own here.
+ manual_compiler_args = os.environ.get('ANNOY_COMPILER_ARGS', None)
+ if manual_compiler_args:

++++++ v1.16.0.tar.gz -> v1.16.3.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/.gitignore new/annoy-1.16.3/.gitignore
--- old/annoy-1.16.0/.gitignore 2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/.gitignore 2019-12-26 22:11:45.000000000 +0100
@@ -12,8 +12,8 @@
 *.idea
 
 # testing
-blah.ann
-t.ann
-x.tree
-test.tree
-foo.idx
+*.ann
+*.tree
+*.annoy
+*.idx
+*.hdf5
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/.travis.yml new/annoy-1.16.3/.travis.yml
--- old/annoy-1.16.0/.travis.yml        2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/.travis.yml        2019-12-26 22:11:45.000000000 +0100
@@ -9,7 +9,10 @@
       env:
         - TOXENV=py36
     - python: 3.7
-      dist: xenial
+      env:
+        - TOXENV=py37
+    - os: osx
+      language: generic
       env:
         - TOXENV=py37
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/README.rst new/annoy-1.16.3/README.rst
--- old/annoy-1.16.0/README.rst 2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/README.rst 2019-12-26 22:11:45.000000000 +0100
@@ -57,9 +57,9 @@
   import random
 
   f = 40
-  t = AnnoyIndex(f)  # Length of item vector that will be indexed
-  for i in xrange(1000):
-      v = [random.gauss(0, 1) for z in xrange(f)]
+  t = AnnoyIndex(f, 'angular')  # Length of item vector that will be indexed
+  for i in range(1000):
+      v = [random.gauss(0, 1) for z in range(f)]
       t.add_item(i, v)
 
   t.build(10) # 10 trees
@@ -67,7 +67,7 @@
 
   # ...
 
-  u = AnnoyIndex(f)
+  u = AnnoyIndex(f, 'angular')
   u.load('test.ann') # super fast, will just mmap the file
   print(u.get_nns_by_item(0, 1000)) # will find the 1000 nearest neighbors
 
@@ -76,7 +76,7 @@
 Full Python API
 ---------------
 
-* ``AnnoyIndex(f, metric='angular')`` returns a new index that's read-write 
and stores vector of ``f`` dimensions. Metric can be ``"angular"``, 
``"euclidean"``, ``"manhattan"``, ``"hamming"``, or ``"dot"``.
+* ``AnnoyIndex(f, metric)`` returns a new index that's read-write and stores 
vector of ``f`` dimensions. Metric can be ``"angular"``, ``"euclidean"``, 
``"manhattan"``, ``"hamming"``, or ``"dot"``.
 * ``a.add_item(i, v)`` adds item ``i`` (any nonnegative integer) with vector 
``v``. Note that it will allocate memory for ``max(i)+1`` items.
 * ``a.build(n_trees)`` builds a forest of ``n_trees`` trees. More trees gives 
higher precision when querying. After calling ``build``, no more items can be 
added.
 * ``a.save(fn, prefault=False)`` saves the index to disk and loads it (see 
next function). After saving, no more items can be added.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/RELEASE.md new/annoy-1.16.3/RELEASE.md
--- old/annoy-1.16.0/RELEASE.md 2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/RELEASE.md 2019-12-26 22:11:45.000000000 +0100
@@ -3,11 +3,11 @@
 
 1. Make sure you're on master. `git checkout master && git fetch && git reset 
--hard origin/master`
 1. Update `setup.py` to the newest version, `git add setup.py && git commit -m 
"version 1.2.3"`
-1. `python setup.py sdist`
+1. `python setup.py sdist bdist_wheel`
 1. `git tag -a v1.2.3 -m "version 1.2.3"`
 1. `git push --tags origin master` to push the last version to Github
 1. Go to https://github.com/spotify/annoy/releases and click "Draft a new 
release"
-1. `twine upload dist/*`
+1. `twine upload dist/annoy-1.2.3*`
 
 TODO
 ----
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/setup.py new/annoy-1.16.3/setup.py
--- old/annoy-1.16.0/setup.py   2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/setup.py   2019-12-26 22:11:45.000000000 +0100
@@ -19,7 +19,6 @@
 import codecs
 import os
 import platform
-import sys
 
 readme_note = """\
 .. note::
@@ -36,13 +35,9 @@
     long_description = readme_note + fobj.read()
 
 # Various platform-dependent extras
-extra_compile_args = []
+extra_compile_args = ['-D_CRT_SECURE_NO_WARNINGS']
 extra_link_args = []
 
-if os.environ.get('TRAVIS') == 'true':
-    # Resolving some annoying issue
-    extra_compile_args += ['-mno-avx']
-
 # Not all CPUs have march as a tuning parameter
 cputune = ['-march=native',]
 if platform.machine() == 'ppc64le':
@@ -59,17 +54,25 @@
     extra_compile_args += ['-std=c++11', '-mmacosx-version-min=10.9']
     extra_link_args += ['-stdlib=libc++', '-mmacosx-version-min=10.9']
 
+# Manual configuration, you're on your own here.
+manual_compiler_args = os.environ.get('ANNOY_COMPILER_ARGS', None)
+if manual_compiler_args:
+    extra_compile_args = manual_compiler_args.split(',')
+manual_linker_args = os.environ.get('ANNOY_LINKER_ARGS', None)
+if manual_linker_args:
+    extra_link_args = manual_linker_args.split(',')
+
 setup(name='annoy',
-      version='1.16.0',
+      version='1.16.3',
       description='Approximate Nearest Neighbors in C++/Python optimized for 
memory usage and loading/saving to disk.',
       packages=['annoy'],
       ext_modules=[
-        Extension(
-            'annoy.annoylib', ['src/annoymodule.cc'],
-            depends=['src/annoylib.h', 'src/kissrandom.h', 'src/mman.h'],
-            extra_compile_args=extra_compile_args,
-            extra_link_args=extra_link_args,
-        )
+          Extension(
+              'annoy.annoylib', ['src/annoymodule.cc'],
+              depends=['src/annoylib.h', 'src/kissrandom.h', 'src/mman.h'],
+              extra_compile_args=extra_compile_args,
+              extra_link_args=extra_link_args,
+          )
       ],
       long_description=long_description,
       author='Erik Bernhardsson',
@@ -87,5 +90,6 @@
           'Programming Language :: Python :: 3.6',
       ],
       keywords='nns, approximate nearest neighbor search',
-      setup_requires=['nose>=1.0']
-    )
+      setup_requires=['nose>=1.0'],
+      tests_require=['numpy', 'h5py']
+      )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/src/annoylib.h 
new/annoy-1.16.3/src/annoylib.h
--- old/annoy-1.16.0/src/annoylib.h     2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/src/annoylib.h     2019-12-26 22:11:45.000000000 +0100
@@ -31,11 +31,16 @@
 typedef unsigned char     uint8_t;
 typedef signed __int32    int32_t;
 typedef unsigned __int64  uint64_t;
+typedef signed __int64    int64_t;
 #else
 #include <stdint.h>
 #endif
 
+
 #if defined(_MSC_VER) || defined(__MINGW32__)
+ // a bit hacky, but override some definitions to support 64 bit
+ #define off_t int64_t
+ #define lseek_getsize(fd) _lseeki64(fd, 0, SEEK_END)
  #ifndef NOMINMAX
   #define NOMINMAX
  #endif
@@ -43,6 +48,7 @@
  #include <windows.h>
 #else
  #include <sys/mman.h>
+ #define lseek_getsize(fd) lseek(fd, 0, SEEK_END)
 #endif
 
 #include <cerrno>
@@ -66,6 +72,22 @@
   #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
 #endif
 
+void set_error_from_errno(char **error, const char* msg) {
+  showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
+  if (error) {
+    *error = (char *)malloc(256);  // TODO: win doesn't support snprintf
+    sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
+  }
+}
+
+void set_error_from_string(char **error, const char* msg) {
+  showUpdate("%s\n", msg);
+  if (error) {
+    *error = (char *)malloc(strlen(msg) + 1);
+    strcpy(*error, msg);
+  }
+}
+
 
 #ifndef _MSC_VER
 #define popcount __builtin_popcountll
@@ -74,12 +96,14 @@
 #define popcount cole_popcount
 #endif
 
-#ifndef NO_MANUAL_VECTORIZATION
-#if defined(__AVX512F__)
+#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && 
defined(__AVX512F__)  // See #402
+#pragma message "Using 512-bit AVX instructions"
 #define USE_AVX512
-#elif defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && 
defined(__SSE3__)
+#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined 
(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
+#pragma message "Using 128-bit AVX instructions"
 #define USE_AVX
-#endif
+#else
+#pragma message "Using no AVX instructions"
 #endif
 
 #if defined(USE_AVX) || defined(USE_AVX512)
@@ -348,7 +372,7 @@
     size_t k = random.index(count);
     T di = ic * Distance::distance(p, nodes[k], f),
       dj = jc * Distance::distance(q, nodes[k], f);
-    T norm = cosine ? get_norm(nodes[k]->v, f) : 1.0;
+    T norm = cosine ? get_norm(nodes[k]->v, f) : 1;
     if (!(norm > T(0))) {
       continue;
     }
@@ -440,18 +464,16 @@
     if (dot != 0)
       return (dot > 0);
     else
-      return random.flip();
+      return (bool)random.flip();
   }
   template<typename S, typename T, typename Random>
   static inline void create_split(const vector<Node<S, T>*>& nodes, int f, 
size_t s, Random& random, Node<S, T>* n) {
-    Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid
-    Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
     two_means<T, Random, Angular, Node<S, T> >(nodes, f, random, true, p, q);
     for (int z = 0; z < f; z++)
       n->v[z] = p->v[z] - q->v[z];
     Base::normalize<T, Node<S, T> >(n, f);
-    free(p);
-    free(q);
   }
   template<typename T>
   static inline T normalized_distance(T distance) {
@@ -517,8 +539,8 @@
 
   template<typename S, typename T, typename Random>
   static inline void create_split(const vector<Node<S, T>*>& nodes, int f, 
size_t s, Random& random, Node<S, T>* n) {
-    Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid
-    Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
     DotProduct::zero_value(p); 
     DotProduct::zero_value(q);
     two_means<T, Random, DotProduct, Node<S, T> >(nodes, f, random, true, p, 
q);
@@ -526,8 +548,6 @@
       n->v[z] = p->v[z] - q->v[z];
     n->dot_factor = p->dot_factor - q->dot_factor;
     DotProduct::normalize<T, Node<S, T> >(n, f);
-    free(p);
-    free(q);
   }
 
   template<typename T, typename Node>
@@ -551,7 +571,7 @@
     if (dot != 0)
       return (dot > 0);
     else
-      return random.flip();
+      return (bool)random.flip();
   }
 
   template<typename T>
@@ -708,7 +728,7 @@
     if (dot != 0)
       return (dot > 0);
     else
-      return random.flip();
+      return (bool)random.flip();
   }
   template<typename T>
   static inline T pq_distance(T distance, T margin, int child_nr) {
@@ -730,8 +750,8 @@
   }
   template<typename S, typename T, typename Random>
   static inline void create_split(const vector<Node<S, T>*>& nodes, int f, 
size_t s, Random& random, Node<S, T>* n) {
-    Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid
-    Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
     two_means<T, Random, Euclidean, Node<S, T> >(nodes, f, random, false, p, 
q);
 
     for (int z = 0; z < f; z++)
@@ -740,8 +760,6 @@
     n->a = 0.0;
     for (int z = 0; z < f; z++)
       n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2;
-    free(p);
-    free(q);
   }
   template<typename T>
   static inline T normalized_distance(T distance) {
@@ -763,8 +781,8 @@
   }
   template<typename S, typename T, typename Random>
   static inline void create_split(const vector<Node<S, T>*>& nodes, int f, 
size_t s, Random& random, Node<S, T>* n) {
-    Node<S, T>* p = (Node<S, T>*)malloc(s); // TODO: avoid
-    Node<S, T>* q = (Node<S, T>*)malloc(s); // TODO: avoid
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
     two_means<T, Random, Manhattan, Node<S, T> >(nodes, f, random, false, p, 
q);
 
     for (int z = 0; z < f; z++)
@@ -773,8 +791,6 @@
     n->a = 0.0;
     for (int z = 0; z < f; z++)
       n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2;
-    free(p);
-    free(q);
   }
   template<typename T>
   static inline T normalized_distance(T distance) {
@@ -791,6 +807,7 @@
 template<typename S, typename T>
 class AnnoyIndexInterface {
  public:
+  // Note that the methods with an **error argument will allocate memory and 
write the pointer to that string if error is non-NULL
   virtual ~AnnoyIndexInterface() {};
   virtual bool add_item(S item, const T* w, char** error=NULL) = 0;
   virtual bool build(int q, char** error=NULL) = 0;
@@ -836,11 +853,13 @@
   bool _verbose;
   int _fd;
   bool _on_disk;
+  bool _built;
 public:
 
-  AnnoyIndex(int f) : _f(f), _random() {
+   AnnoyIndex(int f) : _f(f), _random() {
     _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
     _verbose = false;
+    _built = false;
     _K = (S) (((size_t) (_s - offsetof(Node, children))) / sizeof(S)); // Max 
number of descendants to fit into node
     reinitialize(); // Reset everything
   }
@@ -859,8 +878,7 @@
   template<typename W>
   bool add_item_impl(S item, const W& w, char** error=NULL) {
     if (_loaded) {
-      showUpdate("You can't add an item to a loaded index\n");
-      if (error) *error = (char *)"You can't add an item to a loaded index";
+      set_error_from_string(error, "You can't add an item to a loaded index");
       return false;
     }
     _allocate_size(item + 1);
@@ -887,15 +905,13 @@
     _on_disk = true;
     _fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600);
     if (_fd == -1) {
-      showUpdate("Error: file descriptor is -1\n");
-      if (error) *error = strerror(errno);
+      set_error_from_errno(error, "Unable to open");
       _fd = 0;
       return false;
     }
     _nodes_size = 1;
     if (ftruncate(_fd, _s * _nodes_size) == -1) {
-      showUpdate("Error truncating file: %s\n", strerror(errno));
-      if (error) *error = strerror(errno);
+      set_error_from_errno(error, "Unable to truncate");
       return false;
     }
 #ifdef MAP_POPULATE
@@ -908,8 +924,12 @@
     
   bool build(int q, char** error=NULL) {
     if (_loaded) {
-      showUpdate("You can't build a loaded index\n");
-      if (error) *error = (char *)"You can't build a loaded index";
+      set_error_from_string(error, "You can't build a loaded index");
+      return false;
+    }
+
+    if (_built) {
+      set_error_from_string(error, "You can't build a built index");
       return false;
     }
 
@@ -944,30 +964,34 @@
     if (_on_disk) {
       _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * _n_nodes);
       if (ftruncate(_fd, _s * _n_nodes)) {
-       // TODO: this probably creates an index in a corrupt state... not sure 
what to do
-       showUpdate("Error truncating file: %s\n", strerror(errno));
-       if (error) *error = strerror(errno);
-       return false;
+        // TODO: this probably creates an index in a corrupt state... not sure 
what to do
+        set_error_from_errno(error, "Unable to truncate");
+        return false;
       }
       _nodes_size = _n_nodes;
     }
+    _built = true;
     return true;
   }
   
   bool unbuild(char** error=NULL) {
     if (_loaded) {
-      showUpdate("You can't unbuild a loaded index\n");
-      if (error) *error = (char *)"You can't unbuild a loaded index";
+      set_error_from_string(error, "You can't unbuild a loaded index");
       return false;
     }
 
     _roots.clear();
     _n_nodes = _n_items;
+    _built = false;
 
     return true;
   }
 
   bool save(const char* filename, bool prefault=false, char** error=NULL) {
+    if (!_built) {
+      set_error_from_string(error, "You can't save an index that hasn't been 
built");
+      return false;
+    }
     if (_on_disk) {
       return true;
     } else {
@@ -976,20 +1000,17 @@
 
       FILE *f = fopen(filename, "wb");
       if (f == NULL) {
-        showUpdate("Unable to open: %s\n", strerror(errno));
-        if (error) *error = strerror(errno);
+        set_error_from_errno(error, "Unable to open");
         return false;
       }
 
       if (fwrite(_nodes, _s, _n_nodes, f) != (size_t) _n_nodes) {
-        showUpdate("Unable to write: %s\n", strerror(errno));
-        if (error) *error = strerror(errno);
+        set_error_from_errno(error, "Unable to write");
         return false;
       }
 
       if (fclose(f) == EOF) {
-        showUpdate("Unable to close: %s\n", strerror(errno));
-        if (error) *error = strerror(errno);
+        set_error_from_errno(error, "Unable to close");
         return false;
       }
 
@@ -1030,15 +1051,23 @@
   bool load(const char* filename, bool prefault=false, char** error=NULL) {
     _fd = open(filename, O_RDONLY, (int)0400);
     if (_fd == -1) {
-      showUpdate("Error: file descriptor is -1\n");
-      if (error) *error = strerror(errno);
+      set_error_from_errno(error, "Unable to open");
       _fd = 0;
       return false;
     }
-    off_t size = lseek(_fd, 0, SEEK_END);
-    if (size <= 0) {
-      showUpdate("Warning: index size %zu\n", (size_t)size);
+    off_t size = lseek_getsize(_fd);
+    if (size == -1) {
+      set_error_from_errno(error, "Unable to get size");
+      return false;
+    } else if (size == 0) {
+      set_error_from_errno(error, "Size of file is zero");
+      return false;
+    } else if (size % _s) {
+      // Something is fishy with this index!
+      set_error_from_errno(error, "Index size is not a multiple of vector 
size");
+      return false;
     }
+
     int flags = MAP_SHARED;
     if (prefault) {
 #ifdef MAP_POPULATE
@@ -1048,12 +1077,6 @@
 #endif
     }
     _nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0);
-    if (size % _s) {
-      // Something is fishy with this index!
-      showUpdate("Error: index size %zu is not a multiple of vector size 
%zu\n", (size_t)size, _s);
-      if (error) *error = (char *)"Index size is not a multiple of vector 
size";
-      return false;
-    }
     _n_nodes = (S)(size / _s);
 
     // Find the roots by scanning the end of the file and taking the nodes 
with most descendants
@@ -1072,6 +1095,7 @@
     if (_roots.size() > 1 && _get(_roots.front())->children[0] == 
_get(_roots.back())->children[0])
       _roots.pop_back();
     _loaded = true;
+    _built = true;
     _n_items = m;
     if (_verbose) showUpdate("found %lu roots with degree %d\n", 
_roots.size(), m);
     return true;
@@ -1096,7 +1120,7 @@
   }
 
   S get_n_trees() const {
-    return _roots.size();
+    return (S)_roots.size();
   }
 
   void verbose(bool v) {
@@ -1121,7 +1145,8 @@
       void *old = _nodes;
       
       if (_on_disk) {
-        ftruncate(_fd, _s * new_nodes_size);
+        int rc = ftruncate(_fd, _s * new_nodes_size);
+        if (_verbose && rc) showUpdate("File truncation error\n");
         _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * 
new_nodes_size);
       } else {
         _nodes = realloc(_nodes, _s * new_nodes_size);
@@ -1170,7 +1195,7 @@
     }
 
     vector<S> children_indices[2];
-    Node* m = (Node*)malloc(_s); // TODO: avoid
+    Node* m = (Node*)alloca(_s);
     D::create_split(children, _f, _s, _random, m);
 
     for (size_t i = 0; i < indices.size(); i++) {
@@ -1197,7 +1222,7 @@
 
       // Set the vector to 0.0
       for (int z = 0; z < _f; z++)
-        m->v[z] = 0.0;
+        m->v[z] = 0;
 
       for (size_t i = 0; i < indices.size(); i++) {
         S j = indices[i];
@@ -1217,13 +1242,12 @@
     _allocate_size(_n_nodes + 1);
     S item = _n_nodes++;
     memcpy(_get(item), m, _s);
-    free(m);
 
     return item;
   }
 
   void _get_all_nns(const T* v, size_t n, size_t search_k, vector<S>* result, 
vector<T>* distances) const {
-    Node* v_node = (Node *)malloc(_s); // TODO: avoid
+    Node* v_node = (Node *)alloca(_s);
     D::template zero_value<Node>(v_node);
     memcpy(v_node->v, v, sizeof(T) * _f);
     D::init_node(v_node, _f);
@@ -1259,7 +1283,7 @@
 
     // Get distances for all items
     // To avoid calculating distance multiple times for any items, sort by id
-    sort(nns.begin(), nns.end());
+    std::sort(nns.begin(), nns.end());
     vector<pair<T, S> > nns_dist;
     S last = -1;
     for (size_t i = 0; i < nns.size(); i++) {
@@ -1279,7 +1303,6 @@
         distances->push_back(D::normalized_distance(nns_dist[i].first));
       result->push_back(nns_dist[i].second);
     }
-    free(v_node);
   }
 };
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/src/annoymodule.cc 
new/annoy-1.16.3/src/annoymodule.cc
--- old/annoy-1.16.0/src/annoymodule.cc 2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/src/annoymodule.cc 2019-12-26 22:11:45.000000000 +0100
@@ -185,6 +185,7 @@
 
   if (!self->ptr->load(filename, prefault, &error)) {
     PyErr_SetString(PyExc_IOError, error);
+    free(error);
     return NULL;
   }
   Py_RETURN_TRUE;
@@ -203,6 +204,7 @@
 
   if (!self->ptr->save(filename, prefault, &error)) {
     PyErr_SetString(PyExc_IOError, error);
+    free(error);
     return NULL;
   }
   Py_RETURN_TRUE;
@@ -361,6 +363,7 @@
   char* error;
   if (!self->ptr->add_item(item, &w[0], &error)) {
     PyErr_SetString(PyExc_Exception, error);
+    free(error);
     return NULL;
   }
 
@@ -378,6 +381,7 @@
 
   if (!self->ptr->on_disk_build(filename, &error)) {
     PyErr_SetString(PyExc_IOError, error);
+    free(error);
     return NULL;
   }
   Py_RETURN_TRUE;
@@ -399,6 +403,7 @@
   Py_END_ALLOW_THREADS;
   if (!res) {
     PyErr_SetString(PyExc_Exception, error);
+    free(error);
     return NULL;
   }
 
@@ -414,6 +419,7 @@
   char* error;
   if (!self->ptr->unbuild(&error)) {
     PyErr_SetString(PyExc_Exception, error);
+    free(error);
     return NULL;
   }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/src/mman.h new/annoy-1.16.3/src/mman.h
--- old/annoy-1.16.0/src/mman.h 2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/src/mman.h 2019-12-26 22:11:45.000000000 +0100
@@ -209,7 +209,8 @@
     return -1;
 }
 
-int ftruncate(int fd, unsigned int size) {
+#if !defined(__MINGW32__)
+inline int ftruncate(int fd, unsigned int size) {
     if (fd < 0) {
         errno = EBADF;
         return -1;
@@ -232,5 +233,6 @@
 
     return 0;
 }
+#endif
 
 #endif 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/test/angular_index_test.py 
new/annoy-1.16.3/test/angular_index_test.py
--- old/annoy-1.16.0/test/angular_index_test.py 2019-07-09 04:15:02.000000000 
+0200
+++ new/annoy-1.16.3/test/angular_index_test.py 2019-12-26 22:11:45.000000000 
+0100
@@ -175,7 +175,7 @@
             i.add_item(j, numpy.random.normal(size=f))
         i.build(10)
         indices, dists = i.get_nns_by_item(0, 100000, include_distances=True)
-        self.assertTrue(max(dists) < 2.0)
+        self.assertLessEqual(max(dists), 2.0)
         self.assertAlmostEqual(min(dists), 0.0)
 
     def test_distance_consistency(self):
@@ -193,9 +193,9 @@
                 u_norm = numpy.array(u) * numpy.dot(u, u)**-0.5
                 v_norm = numpy.array(v) * numpy.dot(v, v)**-0.5
                 # cos = numpy.clip(1 - cosine(u, v), -1, 1) # scipy returns 1 
- cos
-                self.assertAlmostEqual(dist, numpy.dot(u_norm - v_norm, u_norm 
- v_norm) ** 0.5)
+                self.assertAlmostEqual(dist ** 2, numpy.dot(u_norm - v_norm, 
u_norm - v_norm))
                 # self.assertAlmostEqual(dist, (2*(1 - cos))**0.5)
-                self.assertAlmostEqual(dist, sum([(x-y)**2 for x, y in 
zip(u_norm, v_norm)])**0.5)
+                self.assertAlmostEqual(dist ** 2, sum([(x-y)**2 for x, y in 
zip(u_norm, v_norm)]))
 
     def test_only_one_item(self):
         # reported to annoy-user by Kireet Reddy
@@ -223,5 +223,7 @@
         a.add_item(0, [1, 0, 0])
         a.build(10)
         a.save('1.ann')
-        self.assertEquals(a.get_nns_by_vector([1, 0, 0], 3, 
include_distances=True), ([0], [0.0]))
+        indices, dists = a.get_nns_by_vector([1, 0, 0], 3, 
include_distances=True)
+        self.assertEquals(indices, [0])
+        self.assertAlmostEqual(dists[0] ** 2, 0.0)
     
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/test/annoy_test.lua 
new/annoy-1.16.3/test/annoy_test.lua
--- old/annoy-1.16.0/test/annoy_test.lua        2019-07-09 04:15:02.000000000 
+0200
+++ new/annoy-1.16.3/test/annoy_test.lua        2019-12-26 22:11:45.000000000 
+0100
@@ -496,16 +496,6 @@
         assert.same(u, y)
     end)
 
-    it("save_without_build", function()
-        -- Issue #61
-        local i = AnnoyIndex(10)
-        i:add_item(1000, randomVector(10, 0, 1))
-        i:save('x.tree')
-        local j = AnnoyIndex(10)
-        j:load('x.tree')
-        j:build(10)
-    end)
-    
     it("on_disk_build", function()
         local f = 2
         local i = AnnoyIndex(f, 'euclidean')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/test/dot_index_test.py 
new/annoy-1.16.3/test/dot_index_test.py
--- old/annoy-1.16.0/test/dot_index_test.py     2019-07-09 04:15:02.000000000 
+0200
+++ new/annoy-1.16.3/test/dot_index_test.py     2019-12-26 22:11:45.000000000 
+0100
@@ -155,4 +155,4 @@
                     i.get_item_vector(a),
                     i.get_item_vector(b)
                 ))
-                self.assertEqual(dist, i.get_distance(a, b))
+                self.assertAlmostEqual(dist, i.get_distance(a, b))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/test/index_test.py 
new/annoy-1.16.3/test/index_test.py
--- old/annoy-1.16.0/test/index_test.py 2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/test/index_test.py 2019-12-26 22:11:45.000000000 +0100
@@ -50,8 +50,11 @@
     def test_save_twice(self):
         # Issue #100
         t = AnnoyIndex(10, 'angular')
-        t.save("t.ann")
-        t.save("t.ann")
+        for i in range(100):
+            t.add_item(i, [random.gauss(0, 1) for z in range(10)])
+        t.build(10)
+        t.save('t1.ann')
+        t.save('t2.ann')
 
     def test_load_save(self):
         # Issue #61
@@ -78,13 +81,11 @@
         self.assertEqual(u, y)
 
     def test_save_without_build(self):
-        # Issue #61
-        i = AnnoyIndex(10, 'angular')
-        i.add_item(1000, [random.gauss(0, 1) for z in range(10)])
-        i.save('x.tree')
-        j = AnnoyIndex(10, 'angular')
-        j.load('x.tree')
-        self.assertRaises(Exception, j.build, 10)
+        t = AnnoyIndex(10, 'angular')
+        for i in range(100):
+            t.add_item(i, [random.gauss(0, 1) for z in range(10)])
+        # Note: in earlier version, this was allowed (see eg #61)
+        self.assertRaises(Exception, t.save, 'x.tree')
         
     def test_unbuild_with_loaded_tree(self):
         i = AnnoyIndex(10, 'angular')
@@ -184,25 +185,11 @@
             t.add_item(i, v)
         t.build(10)
 
-        if sys.platform == "linux" or sys.platform == "linux2":
-            # linux
-            try:
-                t.save("/dev/full") 
-                self.fail("didn't get expected exception")
-            except Exception as e:
-                self.assertTrue('No space left on device' in str(e))
-        elif sys.platform == "darwin":
-            volume = "FULLDISK"
-            device = os.popen('hdiutil attach -nomount ram://64').read()
-            os.popen('diskutil erasevolume MS-DOS %s %s' % (volume, device))
-            os.popen('touch "/Volumes/%s/full"' % volume)
-            try:
-                t.save('/Volumes/%s/annoy.tree' % volume)
-                self.fail("didn't get expected exception")
-            except Exception as e:
-                self.assertTrue('No space left on device' in str(e))
-            finally:
-                os.popen("hdiutil detach %s" % device)
+        if os.name == 'nt':
+            path = 'Z:\\xyz.annoy'
+        else:
+            path = '/x/y/z.annoy'
+        self.assertRaises(Exception, t.save, path)
 
     def test_dimension_mismatch(self):
         t = AnnoyIndex(100, 'angular')
@@ -224,6 +211,37 @@
         t.build(10)
         t.save('test.annoy')
 
-        # Used to segfault
+        # Used to segfault:
         v = [random.gauss(0, 1) for z in range(100)]
         self.assertRaises(Exception, t.add_item, i, v)
+
+    def test_build_twice(self):
+        # 420
+        t = AnnoyIndex(100, 'angular')
+        for i in range(1000):
+            t.add_item(i, [random.gauss(0, 1) for z in range(100)])
+        t.build(10)
+        # Used to segfault:
+        self.assertRaises(Exception, t.build, 10)
+
+    def test_very_large_index(self):
+        # 388
+        f = 3
+        dangerous_size = 2**31
+        size_per_vector = 4*(f+3)
+        n_vectors = int(dangerous_size / size_per_vector)
+        m = AnnoyIndex(3, 'angular')
+        m.verbose(True)
+        for i in range(100):
+            m.add_item(n_vectors+i, [random.gauss(0, 1) for z in range(f)])
+        n_trees = 10
+        m.build(n_trees)
+        path = 'test_big.annoy'
+        m.save(path)  # Raises on Windows
+
+        # Sanity check size of index
+        self.assertGreaterEqual(os.path.getsize(path), dangerous_size)
+        self.assertLess(os.path.getsize(path), dangerous_size + 100e3)
+
+        # Sanity check number of trees
+        self.assertEquals(m.get_n_trees(), n_trees)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/annoy-1.16.0/tox.ini new/annoy-1.16.3/tox.ini
--- old/annoy-1.16.0/tox.ini    2019-07-09 04:15:02.000000000 +0200
+++ new/annoy-1.16.3/tox.ini    2019-12-26 22:11:45.000000000 +0100
@@ -18,7 +18,7 @@
   mkdir -p {env:GOPATH:}/src/annoyindex
   wget https://storage.googleapis.com/golang/go1.5.linux-amd64.tar.gz
   sudo tar -C /usr/local -xzf go1.5.linux-amd64.tar.gz
-  sudo add-apt-repository -y ppa:rosmo/swig3.0.7
+  sudo add-apt-repository -y ppa:timsc/swig-3.0.12
   sudo apt-get update -qq
   sudo apt-get install -y swig3.0
   swig3.0 -go -intgosize 64 -cgo -c++ src/annoygomodule.i


Reply via email to