Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package pgvector for openSUSE:Factory checked in at 2026-06-30 15:14:34 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/pgvector (Old) and /work/SRC/openSUSE:Factory/.pgvector.new.11887 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "pgvector" Tue Jun 30 15:14:34 2026 rev:7 rq:1362635 version:0.8.3 Changes: -------- --- /work/SRC/openSUSE:Factory/pgvector/pgvector.changes 2026-05-06 19:23:39.531342837 +0200 +++ /work/SRC/openSUSE:Factory/.pgvector.new.11887/pgvector.changes 2026-06-30 15:14:55.599077334 +0200 @@ -1,0 +2,8 @@ +Mon Jun 29 20:04:25 UTC 2026 - Dirk Müller <[email protected]> + +- update to 0.8.3: + * Fixed possible index corruption with HNSW vacuuming + * Fixed performance regression with Hamming distance and + Jaccard distance with Postgres 18 + +------------------------------------------------------------------- Old: ---- pgvector-0.8.2.tar.gz New: ---- pgvector-0.8.3.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ pgvector.spec ++++++ --- /var/tmp/diff_new_pack.iKi1i0/_old 2026-06-30 15:14:56.087093901 +0200 +++ /var/tmp/diff_new_pack.iKi1i0/_new 2026-06-30 15:14:56.091094037 +0200 @@ -33,7 +33,7 @@ BuildRequires: %{pg_name}-server-devel %pg_server_requires %endif -Version: 0.8.2 +Version: 0.8.3 Release: 0 Summary: Open-source vector similarity search for Postgres License: PostgreSQL ++++++ pgvector-0.8.2.tar.gz -> pgvector-0.8.3.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/.github/workflows/build.yml new/pgvector-0.8.3/.github/workflows/build.yml --- old/pgvector-0.8.2/.github/workflows/build.yml 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/.github/workflows/build.yml 2026-06-18 06:26:13.000000000 +0200 @@ -97,7 +97,7 @@ with: postgres-version: ${{ matrix.postgres }} - run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && ^ + call "C:\Program Files\Microsoft Visual Studio\${{ matrix.os == 'windows-2025' && 18 || 2022 }}\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && ^ nmake /NOLOGO /F Makefile.win && ^ nmake /NOLOGO /F Makefile.win install && ^ nmake /NOLOGO /F Makefile.win installcheck ${{ matrix.postgres != 17 && 'PG_REGRESS=$(PGROOT)\bin\pg_regress' || '' }} && ^ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/CHANGELOG.md new/pgvector-0.8.3/CHANGELOG.md --- old/pgvector-0.8.2/CHANGELOG.md 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/CHANGELOG.md 2026-06-18 06:26:13.000000000 +0200 @@ -1,6 +1,11 @@ +## 0.8.3 (2026-06-17) + +- Fixed possible index corruption with HNSW vacuuming +- Fixed performance regression with Hamming distance and Jaccard distance with Postgres 18 + ## 0.8.2 (2026-02-25) -- Fixed buffer overflow with parallel HNSW index build +- Fixed buffer overflow with parallel HNSW index build - [more info](https://github.com/pgvector/pgvector/issues/959) - Improved `install` target on Windows - Fixed `Index Searches` in `EXPLAIN` output for Postgres 18 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/Dockerfile new/pgvector-0.8.3/Dockerfile --- old/pgvector-0.8.2/Dockerfile 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/Dockerfile 2026-06-18 06:26:13.000000000 +0200 @@ -5,7 +5,7 @@ FROM postgres:$PG_MAJOR-$DEBIAN_CODENAME ARG PG_MAJOR -ADD https://github.com/pgvector/pgvector.git#v0.8.2 /tmp/pgvector +ADD https://github.com/pgvector/pgvector.git#v0.8.3 /tmp/pgvector RUN apt-get update && \ apt-mark hold locales && \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/META.json new/pgvector-0.8.3/META.json --- old/pgvector-0.8.2/META.json 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/META.json 2026-06-18 06:26:13.000000000 +0200 @@ -2,7 +2,7 @@ "name": "vector", "abstract": "Open-source vector similarity search for Postgres", "description": "Supports L2 distance, inner product, and cosine distance", - "version": "0.8.2", + "version": "0.8.3", "maintainer": [ "Andrew Kane <[email protected]>" ], @@ -20,7 +20,7 @@ "vector": { "file": "sql/vector.sql", "docfile": "README.md", - "version": "0.8.2", + "version": "0.8.3", "abstract": "Open-source vector similarity search for Postgres" } }, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/Makefile new/pgvector-0.8.3/Makefile --- old/pgvector-0.8.2/Makefile 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/Makefile 2026-06-18 06:26:13.000000000 +0200 @@ -1,5 +1,5 @@ EXTENSION = vector -EXTVERSION = 0.8.2 +EXTVERSION = 0.8.3 MODULE_big = vector DATA = $(wildcard sql/*--*--*.sql) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/Makefile.win new/pgvector-0.8.3/Makefile.win --- old/pgvector-0.8.2/Makefile.win 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/Makefile.win 2026-06-18 06:26:13.000000000 +0200 @@ -1,5 +1,5 @@ EXTENSION = vector -EXTVERSION = 0.8.2 +EXTVERSION = 0.8.3 DATA_built = sql\$(EXTENSION)--$(EXTVERSION).sql OBJS = src\bitutils.obj src\bitvec.obj src\halfutils.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\sparsevec.obj src\vector.obj diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/README.md new/pgvector-0.8.3/README.md --- old/pgvector-0.8.2/README.md 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/README.md 2026-06-18 06:26:13.000000000 +0200 @@ -11,6 +11,8 @@ Plus [ACID](https://en.wikipedia.org/wiki/ACID) compliance, point-in-time recovery, JOINs, and all of the other [great features](https://www.postgresql.org/about/) of Postgres +Have a lot of vectors? Use [quantization](#scaling) to scale + [](https://github.com/pgvector/pgvector/actions) ## Installation @@ -21,7 +23,7 @@ ```sh cd /tmp -git clone --branch v0.8.2 https://github.com/pgvector/pgvector.git +git clone --branch v0.8.3 https://github.com/pgvector/pgvector.git cd pgvector make make install # may need sudo @@ -38,7 +40,7 @@ ```cmd set "PGROOT=C:\Program Files\PostgreSQL\18" cd %TEMP% -git clone --branch v0.8.2 https://github.com/pgvector/pgvector.git +git clone --branch v0.8.3 https://github.com/pgvector/pgvector.git cd pgvector nmake /F Makefile.win nmake /F Makefile.win install @@ -314,6 +316,8 @@ The [index options](#index-options) also have a significant impact on build time (use the defaults unless seeing low recall) +Use [binary quantization](#binary-quantization) for faster build times at scale + ### Indexing Progress Check [indexing progress](https://www.postgresql.org/docs/current/progress-reporting.html#CREATE-INDEX-PROGRESS-REPORTING) @@ -443,13 +447,7 @@ CREATE INDEX ON items USING hnsw (embedding vector_l2_ops); ``` -With approximate indexes, filtering is applied *after* the index is scanned. If a condition matches 10% of rows, with HNSW and the default `hnsw.ef_search` of 40, only 4 rows will match on average. For more rows, increase `hnsw.ef_search`. - -```sql -SET hnsw.ef_search = 200; -``` - -Starting with 0.8.0, you can enable [iterative index scans](#iterative-index-scans), which will automatically scan more of the index when needed. +With approximate indexes, filtering is applied *after* the index is scanned. If a condition matches 10% of rows, with HNSW and the default `hnsw.ef_search` of 40, only 4 rows will match on average. For more rows, enable [iterative index scans](#iterative-index-scans), which will automatically scan more of the index when needed. ```sql SET hnsw.iterative_scan = strict_order; @@ -673,6 +671,10 @@ Be sure to restart Postgres for changes to take effect. +### Storing + +Use the `halfvec` type instead of `vector` for a smaller working set. + ### Loading Use `COPY` for bulk loading data ([example](https://github.com/pgvector/pgvector-python/blob/master/examples/loading/example.py)). @@ -687,6 +689,8 @@ See index build time for [HNSW](#index-build-time) and [IVFFlat](#index-build-time-1). +Use [binary quantization](#binary-quantization) for smaller indexes and faster build times at scale. + In production environments, create indexes concurrently to avoid blocking writes. ```sql @@ -717,6 +721,8 @@ #### Approximate Search +Use [binary quantization](#binary-quantization) with re-ranking to keep indexes in-memory at scale. + To speed up queries with an IVFFlat index, increase the number of inverted lists (at the expense of recall). ```sql @@ -732,21 +738,20 @@ VACUUM table_name; ``` -## Monitoring +## Scaling -Monitor performance with [pg_stat_statements](https://www.postgresql.org/docs/current/pgstatstatements.html) (be sure to add it to `shared_preload_libraries`). +For a smaller working set: -```sql -CREATE EXTENSION pg_stat_statements; -``` +1. Use the `halfvec` type instead of `vector` for tables +2. Use [binary quantization](#binary-quantization) for indexes (with re-ranking for search) -Get the most time-consuming queries with: +Scale vertically by increasing memory, CPU, and storage on a single instance. Use existing tools to [tune parameters](#tuning) and [monitor performance](#monitoring). -```sql -SELECT query, calls, ROUND((total_plan_time + total_exec_time) / calls) AS avg_time_ms, - ROUND((total_plan_time + total_exec_time) / 60000) AS total_time_min - FROM pg_stat_statements ORDER BY total_plan_time + total_exec_time DESC LIMIT 20; -``` +Scale horizontally with [replicas](https://www.postgresql.org/docs/current/hot-standby.html), or use [Citus](https://github.com/citusdata/citus), [PgDog](https://github.com/pgdogdev/pgdog), or another approach for sharding ([example](https://github.com/pgvector/pgvector-python/blob/master/examples/citus/example.py)). + +## Monitoring + +Use existing tools like [pg_stat_statements](https://www.postgresql.org/docs/current/pgstatstatements.html) or [PgHero](https://github.com/ankane/pghero) to monitor performance. Monitor recall by comparing results from approximate search with exact search. @@ -757,14 +762,6 @@ COMMIT; ``` -## Scaling - -Scale pgvector the same way you scale Postgres. - -Scale vertically by increasing memory, CPU, and storage on a single instance. Use existing tools to [tune parameters](#tuning) and [monitor performance](#monitoring). - -Scale horizontally with [replicas](https://www.postgresql.org/docs/current/hot-standby.html), or use [Citus](https://github.com/citusdata/citus) or another approach for sharding ([example](https://github.com/pgvector/pgvector-python/blob/master/examples/citus/example.py)). - ## Languages Use pgvector from any language with a Postgres client. You can even generate and store vectors in one language and query them in another. @@ -878,6 +875,8 @@ SELECT pg_size_pretty(pg_relation_size('index_name')); ``` +Use [half-precision indexing](#half-precision-indexing) or [binary quantization](#binary-quantization) for smaller indexes. + ## Troubleshooting #### Why isn’t a query using an index? @@ -1152,23 +1151,23 @@ Supported tags are: -- `pg18-trixie`, `0.8.2-pg18-trixie` -- `pg18-bookworm`, `0.8.2-pg18-bookworm`, `pg18`, `0.8.2-pg18` -- `pg17-trixie`, `0.8.2-pg17-trixie` -- `pg17-bookworm`, `0.8.2-pg17-bookworm`, `pg17`, `0.8.2-pg17` -- `pg16-trixie`, `0.8.2-pg16-trixie` -- `pg16-bookworm`, `0.8.2-pg16-bookworm`, `pg16`, `0.8.2-pg16` -- `pg15-trixie`, `0.8.2-pg15-trixie` -- `pg15-bookworm`, `0.8.2-pg15-bookworm`, `pg15`, `0.8.2-pg15` -- `pg14-trixie`, `0.8.2-pg14-trixie` -- `pg14-bookworm`, `0.8.2-pg14-bookworm`, `pg14`, `0.8.2-pg14` -- `pg13-trixie`, `0.8.2-pg13-trixie` -- `pg13-bookworm`, `0.8.2-pg13-bookworm`, `pg13`, `0.8.2-pg13` +- `pg18-trixie`, `0.8.3-pg18-trixie` +- `pg18-bookworm`, `0.8.3-pg18-bookworm`, `pg18`, `0.8.3-pg18` +- `pg17-trixie`, `0.8.3-pg17-trixie` +- `pg17-bookworm`, `0.8.3-pg17-bookworm`, `pg17`, `0.8.3-pg17` +- `pg16-trixie`, `0.8.3-pg16-trixie` +- `pg16-bookworm`, `0.8.3-pg16-bookworm`, `pg16`, `0.8.3-pg16` +- `pg15-trixie`, `0.8.3-pg15-trixie` +- `pg15-bookworm`, `0.8.3-pg15-bookworm`, `pg15`, `0.8.3-pg15` +- `pg14-trixie`, `0.8.3-pg14-trixie` +- `pg14-bookworm`, `0.8.3-pg14-bookworm`, `pg14`, `0.8.3-pg14` +- `pg13-trixie`, `0.8.3-pg13-trixie` +- `pg13-bookworm`, `0.8.3-pg13-bookworm`, `pg13`, `0.8.3-pg13` You can also build the image manually: ```sh -git clone --branch v0.8.2 https://github.com/pgvector/pgvector.git +git clone --branch v0.8.3 https://github.com/pgvector/pgvector.git cd pgvector docker build --pull --build-arg PG_MAJOR=18 -t myuser/pgvector . ``` @@ -1330,7 +1329,7 @@ To enable benchmarking: ```sh -make clean && PG_CFLAGS="-DIVFFLAT_BENCH" make && make install +make clean && PG_CFLAGS="-DHNSW_BENCH -DIVFFLAT_BENCH" make && make install ``` To show memory usage: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/sql/vector--0.8.2--0.8.3.sql new/pgvector-0.8.3/sql/vector--0.8.2--0.8.3.sql --- old/pgvector-0.8.2/sql/vector--0.8.2--0.8.3.sql 1970-01-01 01:00:00.000000000 +0100 +++ new/pgvector-0.8.3/sql/vector--0.8.2--0.8.3.sql 2026-06-18 06:26:13.000000000 +0200 @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.8.3'" to load this file. \quit diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/bitutils.c new/pgvector-0.8.3/src/bitutils.c --- old/pgvector-0.8.2/src/bitutils.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/bitutils.c 2026-06-18 06:26:13.000000000 +0200 @@ -31,10 +31,12 @@ #define BIT_TARGET_CLONES #endif -/* Use built-ins when possible for inlining */ -#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE_LONG_INT_64) +/* Use built-ins when possible for Postgres < 19 for inlining */ +#if PG_VERSION_NUM >= 190000 +#define popcount64(x) pg_popcount64(x) +#elif defined(HAVE__BUILTIN_POPCOUNT) && (defined(HAVE_LONG_INT_64) || SIZEOF_LONG == 8) #define popcount64(x) __builtin_popcountl(x) -#elif defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE_LONG_LONG_INT_64) +#elif defined(HAVE__BUILTIN_POPCOUNT) && (defined(HAVE_LONG_LONG_INT_64) || SIZEOF_LONG_LONG == 8) #define popcount64(x) __builtin_popcountll(x) #elif !defined(_MSC_VER) /* Fails to resolve with MSVC */ @@ -169,7 +171,7 @@ #endif TARGET_XSAVE static bool -SupportsAvx512Popcount() +SupportsAvx512Popcount(void) { unsigned int exx[4] = {0, 0, 0, 0}; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/hnsw.c new/pgvector-0.8.3/src/hnsw.c --- old/pgvector-0.8.2/src/hnsw.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/hnsw.c 2026-06-18 06:26:13.000000000 +0200 @@ -13,6 +13,7 @@ #include "hnsw.h" #include "miscadmin.h" #include "nodes/pg_list.h" +#include "storage/lwlock.h" #include "utils/float.h" #include "utils/guc.h" #include "utils/relcache.h" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/hnsw.h new/pgvector-0.8.3/src/hnsw.h --- old/pgvector-0.8.2/src/hnsw.h 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/hnsw.h 2026-06-18 06:26:13.000000000 +0200 @@ -10,10 +10,18 @@ #include "lib/pairingheap.h" #include "nodes/execnodes.h" #include "port.h" /* for random() */ +#include "storage/bufpage.h" +#include "storage/condition_variable.h" +#include "storage/lwlock.h" +#include "storage/s_lock.h" #include "utils/relptr.h" #include "utils/sampling.h" #include "vector.h" +#ifdef HNSW_BENCH +#include "portability/instr_time.h" +#endif + #if PG_VERSION_NUM >= 190000 typedef Pointer Item; #endif @@ -74,6 +82,21 @@ #define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page)) #define HnswPageGetMeta(page) ((HnswMetaPageData *) PageGetContents(page)) +#ifdef HNSW_BENCH +#define HnswBench(name, code) \ + do { \ + instr_time start; \ + instr_time duration; \ + INSTR_TIME_SET_CURRENT(start); \ + (code); \ + INSTR_TIME_SET_CURRENT(duration); \ + INSTR_TIME_SUBTRACT(duration, start); \ + elog(INFO, "%s: %.3f ms", name, INSTR_TIME_GET_MILLISEC(duration)); \ + } while (0) +#else +#define HnswBench(name, code) (code) +#endif + #if PG_VERSION_NUM >= 150000 #define RandomDouble() pg_prng_double(&pg_global_prng_state) #define SeedRandom(seed) pg_prng_seed(&pg_global_prng_state, seed) @@ -408,6 +431,7 @@ BufferAccessStrategy bas; HnswNeighborTuple ntup; HnswElementData highestPoint; + HnswElementData fallbackPoint; /* Memory */ MemoryContext tmpCtx; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/hnswbuild.c new/pgvector-0.8.3/src/hnswbuild.c --- old/pgvector-0.8.2/src/hnswbuild.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/hnswbuild.c 2026-06-18 06:26:13.000000000 +0200 @@ -36,6 +36,8 @@ */ #include "postgres.h" +#include <limits.h> + #include "access/genam.h" #include "access/parallel.h" #include "access/relscan.h" @@ -52,6 +54,7 @@ #include "nodes/execnodes.h" #include "optimizer/optimizer.h" #include "storage/bufmgr.h" +#include "storage/condition_variable.h" #include "tcop/tcopprot.h" #include "utils/datum.h" #include "utils/memutils.h" @@ -800,7 +803,11 @@ buildstate.hnswarea = hnswarea; InitAllocator(&buildstate.allocator, &HnswSharedMemoryAlloc, &buildstate); scan = table_beginscan_parallel(heapRel, - ParallelTableScanFromHnswShared(hnswshared)); + ParallelTableScanFromHnswShared(hnswshared) +#if PG_VERSION_NUM >= 190000 + ,SO_NONE +#endif + ); reltuples = table_index_build_scan(heapRel, indexRel, indexInfo, true, progress, BuildCallback, (void *) &buildstate, scan); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/hnswinsert.c new/pgvector-0.8.3/src/hnswinsert.c --- old/pgvector-0.8.2/src/hnswinsert.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/hnswinsert.c 2026-06-18 06:26:13.000000000 +0200 @@ -6,6 +6,7 @@ #include "nodes/execnodes.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" +#include "storage/lwlock.h" #include "utils/datum.h" #include "utils/memutils.h" #include "utils/rel.h" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/hnswscan.c new/pgvector-0.8.3/src/hnswscan.c --- old/pgvector-0.8.2/src/hnswscan.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/hnswscan.c 2026-06-18 06:26:13.000000000 +0200 @@ -1,5 +1,7 @@ #include "postgres.h" +#include <limits.h> + #include "access/genam.h" #include "access/relscan.h" #include "hnsw.h" @@ -151,7 +153,7 @@ /* Calculate max memory */ /* Add 256 extra bytes to fill last block when close */ maxMemory = (double) work_mem * hnsw_scan_mem_multiplier * 1024.0 + 256; - so->maxMemory = Min(maxMemory, (double) SIZE_MAX); + so->maxMemory = Min(maxMemory, (double) (SIZE_MAX / 2)); scan->opaque = so; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/hnswutils.c new/pgvector-0.8.3/src/hnswutils.c --- old/pgvector-0.8.2/src/hnswutils.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/hnswutils.c 2026-06-18 06:26:13.000000000 +0200 @@ -546,6 +546,9 @@ Assert(HnswIsElementTuple(etup)); + if (unlikely(etup->deleted)) + elog(ERROR, "cannot load deleted element"); + /* Calculate distance */ if (distance != NULL) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/hnswvacuum.c new/pgvector-0.8.3/src/hnswvacuum.c --- old/pgvector-0.8.2/src/hnswvacuum.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/hnswvacuum.c 2026-06-18 06:26:13.000000000 +0200 @@ -37,17 +37,20 @@ { BlockNumber blkno = HNSW_HEAD_BLKNO; HnswElement highestPoint = &vacuumstate->highestPoint; + HnswElement fallbackPoint = &vacuumstate->fallbackPoint; Relation index = vacuumstate->index; BufferAccessStrategy bas = vacuumstate->bas; - HnswElement entryPoint = HnswGetEntryPoint(vacuumstate->index); IndexBulkDeleteResult *stats = vacuumstate->stats; - /* Store separately since highestPoint.level is uint8 */ + /* Store separately since HnswElement level is uint8 */ int highestLevel = -1; + int fallbackLevel = -1; - /* Initialize highest point */ + /* Initialize highest point and fallback point */ highestPoint->blkno = InvalidBlockNumber; highestPoint->offno = InvalidOffsetNumber; + fallbackPoint->blkno = InvalidBlockNumber; + fallbackPoint->offno = InvalidOffsetNumber; while (BlockNumberIsValid(blkno)) { @@ -119,14 +122,31 @@ tidhash_insert(vacuumstate->deleted, ip, &found); Assert(!found); } - else if (etup->level > highestLevel && !(entryPoint != NULL && blkno == entryPoint->blkno && offno == entryPoint->offno)) + else if (etup->level > highestLevel) { - /* Keep track of highest non-entry point */ + if (BlockNumberIsValid(highestPoint->blkno)) + { + /* Current highest point becomes fallback */ + fallbackPoint->blkno = highestPoint->blkno; + fallbackPoint->offno = highestPoint->offno; + fallbackPoint->level = highestPoint->level; + fallbackLevel = highestLevel; + } + + /* Keep track of highest point */ highestPoint->blkno = blkno; highestPoint->offno = offno; highestPoint->level = etup->level; highestLevel = etup->level; } + else if (etup->level > fallbackLevel) + { + /* Keep track of second highest point */ + fallbackPoint->blkno = blkno; + fallbackPoint->offno = offno; + fallbackPoint->level = etup->level; + fallbackLevel = etup->level; + } } blkno = HnswPageGetOpaque(page)->nextblkno; @@ -138,6 +158,10 @@ UnlockReleaseBuffer(buf); } + +#ifdef HNSW_MEMORY + elog(INFO, "memory: %zu KB", MemoryContextMemAllocated(CurrentMemoryContext, true) / 1024); +#endif } /* @@ -269,12 +293,27 @@ /* Get a shared lock */ LockPage(index, HNSW_UPDATE_LOCK, ShareLock); - /* Load element */ - HnswLoadElement(highestPoint, NULL, NULL, index, support, true, NULL); + /* Get latest entry point */ + entryPoint = HnswGetEntryPoint(index); - /* Repair if needed */ - if (NeedsUpdated(vacuumstate, highestPoint)) - RepairGraphElement(vacuumstate, highestPoint, HnswGetEntryPoint(index)); + /* Use fallback point if highest point is entry point */ + if (entryPoint != NULL && entryPoint->blkno == highestPoint->blkno && entryPoint->offno == highestPoint->offno) + { + highestPoint = &vacuumstate->fallbackPoint; + + if (!BlockNumberIsValid(highestPoint->blkno)) + highestPoint = NULL; + } + + if (highestPoint != NULL) + { + /* Load element */ + HnswLoadElement(highestPoint, NULL, NULL, index, support, true, NULL); + + /* Repair if needed */ + if (NeedsUpdated(vacuumstate, highestPoint)) + RepairGraphElement(vacuumstate, highestPoint, entryPoint); + } /* Release lock */ UnlockPage(index, HNSW_UPDATE_LOCK, ShareLock); @@ -441,6 +480,99 @@ /* Reset memory context */ MemoryContextSwitchTo(oldCtx); MemoryContextReset(vacuumstate->tmpCtx); + +#ifdef HNSW_VACUUM_PROGRESS + if (!BlockNumberIsValid(blkno) || (blkno - HNSW_HEAD_BLKNO) % 1000 == 0) + { + BlockNumber totalBlocks = RelationGetNumberOfBlocks(index); + BlockNumber currentBlocks = BlockNumberIsValid(blkno) ? blkno : totalBlocks; + + elog(INFO, "hnsw vacuum progress: %.1f%%", 100.0 * currentBlocks / totalBlocks); + } +#endif + } +} + +/* + * Confirm graph was repaired + */ +static void +ConfirmRepaired(HnswVacuumState * vacuumstate) +{ + BlockNumber blkno = HNSW_HEAD_BLKNO; + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + + while (BlockNumberIsValid(blkno)) + { + Buffer buf; + Page page; + OffsetNumber offno; + OffsetNumber maxoffno; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Iterate over nodes */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + HnswElementTuple etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno)); + HnswNeighborTuple ntup; + Buffer nbuf; + Page npage; + BlockNumber neighborPage; + OffsetNumber neighborOffno; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + /* Skip if being deleted */ + if (!ItemPointerIsValid(&etup->heaptids[0])) + continue; + + /* Get neighbor page */ + neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + + if (neighborPage == blkno) + { + nbuf = buf; + npage = page; + } + else + { + nbuf = ReadBufferExtended(index, MAIN_FORKNUM, neighborPage, RBM_NORMAL, bas); + LockBuffer(nbuf, BUFFER_LOCK_SHARE); + npage = BufferGetPage(nbuf); + } + + ntup = (HnswNeighborTuple) PageGetItem(npage, PageGetItemId(npage, neighborOffno)); + + /* Check neighbors */ + for (int i = 0; i < ntup->count; i++) + { + ItemPointer indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) + continue; + + /* Check if in deleted list */ + if (DeletedContains(vacuumstate->deleted, indextid)) + elog(ERROR, "hnsw graph not repaired"); + } + + if (nbuf != buf) + UnlockReleaseBuffer(nbuf); + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); } } @@ -634,13 +766,16 @@ InitVacuumState(&vacuumstate, info, stats, callback, callback_state); /* Pass 1: Remove heap TIDs */ - RemoveHeapTids(&vacuumstate); + HnswBench("RemoveHeapTids", RemoveHeapTids(&vacuumstate)); /* Pass 2: Repair graph */ - RepairGraph(&vacuumstate); + HnswBench("RepairGraph", RepairGraph(&vacuumstate)); + + /* Pass 3: Confirm repaired */ + HnswBench("ConfirmRepaired", ConfirmRepaired(&vacuumstate)); - /* Pass 3: Mark as deleted */ - MarkDeleted(&vacuumstate); + /* Pass 4: Mark as deleted */ + HnswBench("MarkDeleted", MarkDeleted(&vacuumstate)); FreeVacuumState(&vacuumstate); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/ivfbuild.c new/pgvector-0.8.3/src/ivfbuild.c --- old/pgvector-0.8.2/src/ivfbuild.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/ivfbuild.c 2026-06-18 06:26:13.000000000 +0200 @@ -22,6 +22,7 @@ #include "nodes/execnodes.h" #include "optimizer/optimizer.h" #include "storage/bufmgr.h" +#include "storage/condition_variable.h" #include "tcop/tcopprot.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -62,15 +63,13 @@ Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); /* - * Normalize with KMEANS_NORM_PROC since spherical distance function - * expects unit vectors + * Check with KMEANS_NORM_PROC that the value can be normalized since + * spherical distance function expects unit vectors */ if (buildstate->kmeansnormprocinfo != NULL) { if (!IvfflatCheckNorm(buildstate->kmeansnormprocinfo, buildstate->collation, value)) return; - - value = IvfflatNormValue(buildstate->typeInfo, buildstate->collation, value); } if (samples->length < targsamples) @@ -81,7 +80,7 @@ else { if (buildstate->rowstoskip < 0) - buildstate->rowstoskip = reservoir_get_next_S(&buildstate->rstate, samples->length, targsamples); + buildstate->rowstoskip = reservoir_get_next_S(&buildstate->rstate, buildstate->samplerows, targsamples); if (buildstate->rowstoskip <= 0) { @@ -97,6 +96,9 @@ buildstate->rowstoskip -= 1; } + + /* Increment after reservoir_get_next_S */ + buildstate->samplerows += 1; } /* @@ -133,6 +135,7 @@ int targsamples = buildstate->samples->maxlen; BlockNumber totalblocks = RelationGetNumberOfBlocks(buildstate->heap); + buildstate->samplerows = 0; buildstate->rowstoskip = -1; BlockSampler_Init(&buildstate->bs, totalblocks, targsamples, RandomInt()); @@ -142,8 +145,24 @@ { BlockNumber targblock = BlockSampler_Next(&buildstate->bs); + /* Set anyvisible to false like table_index_build_scan */ table_index_build_range_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, - false, true, false, targblock, 1, SampleCallback, (void *) buildstate, NULL); + false, false, false, targblock, 1, SampleCallback, (void *) buildstate, NULL); + } + + /* Normalize if needed */ + if (buildstate->kmeansnormprocinfo != NULL) + { + VectorArray samples = buildstate->samples; + + for (int i = 0; i < samples->length; i++) + { + Datum value = PointerGetDatum(VectorArrayGet(samples, i)); + Datum normValue = IvfflatNormValue(buildstate->typeInfo, buildstate->collation, value); + + VectorArraySet(samples, i, DatumGetPointer(normValue)); + pfree(DatumGetPointer(normValue)); + } } } @@ -374,9 +393,13 @@ TupleDescInitEntry(buildstate->sortdesc, (AttrNumber) 1, "list", INT4OID, -1, 0); TupleDescInitEntry(buildstate->sortdesc, (AttrNumber) 2, "tid", TIDOID, -1, 0); TupleDescInitEntry(buildstate->sortdesc, (AttrNumber) 3, "vector", TupleDescAttr(buildstate->tupdesc, 0)->atttypid, -1, 0); +#if PG_VERSION_NUM >= 190000 + TupleDescFinalize(buildstate->sortdesc); +#endif buildstate->slot = MakeSingleTupleTableSlot(buildstate->sortdesc, &TTSOpsVirtual); + /* TODO Ensure within maintenance_work_mem */ buildstate->centers = VectorArrayInit(buildstate->lists, buildstate->dimensions, buildstate->typeInfo->itemSize(buildstate->dimensions)); buildstate->listInfo = palloc(sizeof(ListInfo) * buildstate->lists); @@ -435,7 +458,7 @@ buildstate->samples = VectorArrayInit(numSamples, buildstate->dimensions, buildstate->centers->itemsize); if (buildstate->heap != NULL) { - SampleRows(buildstate); + IvfflatBench("sample rows", SampleRows(buildstate)); if (buildstate->samples->length < buildstate->lists) { @@ -650,7 +673,11 @@ ivfspool->sortstate = InitBuildSortState(buildstate.sortdesc, sortmem, coordinate); buildstate.sortstate = ivfspool->sortstate; scan = table_beginscan_parallel(ivfspool->heap, - ParallelTableScanFromIvfflatShared(ivfshared)); + ParallelTableScanFromIvfflatShared(ivfshared) +#if PG_VERSION_NUM >= 190000 + ,SO_NONE +#endif + ); reltuples = table_index_build_scan(ivfspool->heap, ivfspool->index, indexInfo, true, progress, BuildCallback, (void *) &buildstate, scan); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/ivfflat.h new/pgvector-0.8.3/src/ivfflat.h --- old/pgvector-0.8.2/src/ivfflat.h 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/ivfflat.h 2026-06-18 06:26:13.000000000 +0200 @@ -9,6 +9,7 @@ #include "lib/pairingheap.h" #include "nodes/execnodes.h" #include "port.h" /* for random() */ +#include "storage/condition_variable.h" #include "utils/sampling.h" #include "utils/tuplesort.h" #include "vector.h" @@ -213,7 +214,8 @@ /* Sampling */ BlockSamplerData bs; ReservoirStateData rstate; - int rowstoskip; + double samplerows; + double rowstoskip; /* Sorting */ Tuplesortstate *sortstate; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/ivfscan.c new/pgvector-0.8.3/src/ivfscan.c --- old/pgvector-0.8.2/src/ivfscan.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/ivfscan.c 2026-06-18 06:26:13.000000000 +0200 @@ -298,6 +298,9 @@ so->tupdesc = CreateTemplateTupleDesc(2); TupleDescInitEntry(so->tupdesc, (AttrNumber) 1, "distance", FLOAT8OID, -1, 0); TupleDescInitEntry(so->tupdesc, (AttrNumber) 2, "heaptid", TIDOID, -1, 0); +#if PG_VERSION_NUM >= 190000 + TupleDescFinalize(so->tupdesc); +#endif /* Prep sort */ so->sortstate = InitScanSortState(so->tupdesc); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/sparsevec.c new/pgvector-0.8.3/src/sparsevec.c --- old/pgvector-0.8.2/src/sparsevec.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/sparsevec.c 2026-06-18 06:26:13.000000000 +0200 @@ -182,10 +182,10 @@ static int CompareIndices(const void *a, const void *b) { - if (((SparseInputElement *) a)->index < ((SparseInputElement *) b)->index) + if (((const SparseInputElement *) a)->index < ((const SparseInputElement *) b)->index) return -1; - if (((SparseInputElement *) a)->index > ((SparseInputElement *) b)->index) + if (((const SparseInputElement *) a)->index > ((const SparseInputElement *) b)->index) return 1; return 0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/src/vector.c new/pgvector-0.8.3/src/vector.c --- old/pgvector-0.8.2/src/vector.c 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/src/vector.c 2026-06-18 06:26:13.000000000 +0200 @@ -40,7 +40,7 @@ #endif #if PG_VERSION_NUM >= 180000 -PG_MODULE_MAGIC_EXT(.name = "vector",.version = "0.8.2"); +PG_MODULE_MAGIC_EXT(.name = "vector", .version = "0.8.3"); #else PG_MODULE_MAGIC; #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pgvector-0.8.2/vector.control new/pgvector-0.8.3/vector.control --- old/pgvector-0.8.2/vector.control 2026-02-25 19:46:57.000000000 +0100 +++ new/pgvector-0.8.3/vector.control 2026-06-18 06:26:13.000000000 +0200 @@ -1,4 +1,4 @@ comment = 'vector data type and ivfflat and hnsw access methods' -default_version = '0.8.2' +default_version = '0.8.3' module_pathname = '$libdir/vector' relocatable = true
