On Thu, Feb 1, 2024 at 11:11 PM vignesh C <vignes...@gmail.com> wrote: > > On Wed, 6 Dec 2023 at 04:08, Tommy Pavlicek <tommypav...@gmail.com> wrote: > > > > Thanks. > > > > I've attached the latest version that updates the naming in line with > > the convention. > > > > On Mon, Dec 4, 2023 at 12:46 AM jian he <jian.universal...@gmail.com> wrote: > > > > > > On Fri, Dec 1, 2023 at 8:44 AM Tommy Pavlicek <tommypav...@gmail.com> > > > wrote: > > > > > > > > > > > > Patch updated for those comments (and a touch of cleanup in the tests) > > > > attached. > > > > > > it would be a better name as hash_ltree than ltree_hash, similar logic > > > applies to ltree_hash_extended. > > > that would be the convention. see: > > > https://stackoverflow.com/a/69650940/15603477 > > > > > > > > > Other than that, it looks good. > > CFBot shows one of the test is failing as in [1]: > diff -U3 /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out > /tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out > --- /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out 2024-01-31 > 15:18:42.893039599 +0000 > +++ /tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out > 2024-01-31 15:23:25.309028749 +0000 > @@ -1442,9 +1442,14 @@ > ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v) > WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32) > OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32); > - value | standard | extended0 | extended1 > --------+----------+-----------+----------- > -(0 rows) > + value | standard | > extended0 | extended1 > +-------------+----------------------------------+----------------------------------+---------------------------------- > + 0 | 10001010100010010000000000001011 | > 01011001111001000100011001011011 | 01011001111001000100011010011111 > + 0.1 | 10100000111110001010110001001110 | > 00111100100010001100110111010101 | 00111100100010001101100011010101 > + 0.1.2 | 01111000011100000101111101110100 | > 10101110011101011000000011010111 | 10101110011101110010001111000011 > + 0 | 10001010100010010000000000001011 | > 01011001111001000100011001011011 | 01011001111001000100011010011111 > + 0_asd.1_ASD | 01000010001010000000101001001101 | > 00111100100010001100110111010101 | 00111100100010001101100011010101 > +(5 rows) > > Please post an updated version for the same. > > [1] - > https://api.cirrus-ci.com/v1/artifact/task/5572544858685440/testrun/build-32/testrun/ltree/regress/regression.diffs >
It only fails on Linux - Debian Bullseye - Meson. I fixed the white space, named it v5. I also made the following changes: from uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed); uint32 levelHash = hash_any((unsigned char *) al->name, al->len); to uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *) al->name, al->len, seed)); uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name, al->len)); (these two line live in different functions) I have some problems testing it locally, so I post the patch.
From ffc0e3daea5e490d8b9e6b7a0345f780d88b5624 Mon Sep 17 00:00:00 2001 From: jian he <jian.universality@gmail.com> Date: Fri, 2 Feb 2024 16:03:56 +0800 Subject: [PATCH v5 1/1] add hash functions for the ltree extension. --- contrib/ltree/Makefile | 2 +- contrib/ltree/expected/ltree.out | 65 +++++++++++++++++++++++++++++ contrib/ltree/ltree--1.2--1.3.sql | 23 +++++++++++ contrib/ltree/ltree.control | 2 +- contrib/ltree/ltree_op.c | 68 +++++++++++++++++++++++++++++++ contrib/ltree/ltreetest.sql | 1 + contrib/ltree/meson.build | 1 + contrib/ltree/sql/ltree.sql | 43 +++++++++++++++++++ doc/src/sgml/ltree.sgml | 8 ++++ 9 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 contrib/ltree/ltree--1.2--1.3.sql diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile index 770769a7..f50f2c94 100644 --- a/contrib/ltree/Makefile +++ b/contrib/ltree/Makefile @@ -14,7 +14,7 @@ OBJS = \ ltxtquery_op.o EXTENSION = ltree -DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql +DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql PGFILEDESC = "ltree - hierarchical label data type" HEADERS = ltree.h diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out index 984cd030..dd4f0fc1 100644 --- a/contrib/ltree/expected/ltree.out +++ b/contrib/ltree/expected/ltree.out @@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e'; g.b.c.d.e (1 row) +-- Check that the hash_ltree() and hash_ltree_extended() function's lower +-- 32 bits match when the seed is 0 and do not match when the seed != 0 +SELECT v as value, hash_ltree(v)::bit(32) as standard, + hash_ltree_extended(v, 0)::bit(32) as extended0, + hash_ltree_extended(v, 1)::bit(32) as extended1 +FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree), + ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v) +WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32) + OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32); + value | standard | extended0 | extended1 +-------+----------+-----------+----------- +(0 rows) + CREATE TABLE ltreetest (t ltree); \copy ltreetest FROM 'data/ltree.data' +SELECT count(*) from ltreetest; + count +------- + 1006 +(1 row) + SELECT * FROM ltreetest WHERE t < '12.3' order by t asc; t ---------------------------------- @@ -7832,6 +7851,52 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc; 23.3.32.21.5.14.10.17.1 (4 rows) +drop index tstidx; +--- test hash index +create index tstidx on ltreetest using hash (t); +set enable_seqscan=off; +set enable_bitmapscan=off; +EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; + QUERY PLAN +-------------------------------------- + Index Scan using tstidx on ltreetest + Index Cond: (t = '12.3'::ltree) +(2 rows) + +SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; + t +------ + 12.3 +(1 row) + +set enable_seqscan=on; +set enable_bitmapscan=on; +-- test hash aggregate +set enable_hashagg=on; +set enable_sort=off; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + QUERY PLAN +----------------------------------------------------- + Aggregate + -> HashAggregate + Group Key: ltreetest.t + -> Append + -> Seq Scan on ltreetest + -> Seq Scan on ltreetest ltreetest_1 +(6 rows) + +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + count +------- + 1006 +(1 row) + +set enable_sort=on; drop index tstidx; create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0)); ERROR: value 0 out of bounds for option "siglen" diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql new file mode 100644 index 00000000..bc9a34dd --- /dev/null +++ b/contrib/ltree/ltree--1.2--1.3.sql @@ -0,0 +1,23 @@ +/* contrib/ltree/ltree--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION hash_ltree(ltree) +RETURNS integer +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION hash_ltree_extended(ltree, bigint) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE OPERATOR CLASS hash_ltree_ops +DEFAULT FOR TYPE ltree USING hash +AS + OPERATOR 1 = , + FUNCTION 1 hash_ltree(ltree), + FUNCTION 2 hash_ltree_extended(ltree, bigint); + +ALTER OPERATOR =(ltree, ltree) SET (HASHES); diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control index b408d647..c2cbeda9 100644 --- a/contrib/ltree/ltree.control +++ b/contrib/ltree/ltree.control @@ -1,6 +1,6 @@ # ltree extension comment = 'data type for hierarchical tree-like structures' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/ltree' relocatable = true trusted = true diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c index da1db5fc..4c464153 100644 --- a/contrib/ltree/ltree_op.c +++ b/contrib/ltree/ltree_op.c @@ -9,6 +9,7 @@ #include "access/htup_details.h" #include "catalog/pg_statistic.h" +#include "common/hashfn.h" #include "ltree.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -37,6 +38,8 @@ PG_FUNCTION_INFO_V1(lca); PG_FUNCTION_INFO_V1(ltree2text); PG_FUNCTION_INFO_V1(text2ltree); PG_FUNCTION_INFO_V1(ltreeparentsel); +PG_FUNCTION_INFO_V1(hash_ltree); +PG_FUNCTION_INFO_V1(hash_ltree_extended); int ltree_compare(const ltree *a, const ltree *b) @@ -588,3 +591,68 @@ ltreeparentsel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8((float8) selec); } + +/* + * Hashes the elements of the path using the same logic as hash_array + */ +Datum +hash_ltree(PG_FUNCTION_ARGS) +{ + ltree *a = PG_GETARG_LTREE_P(0); + + uint32 result = 1; + int an = a->numlevel; + ltree_level *al = LTREE_FIRST(a); + + while (an > 0) + { + uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name, al->len)); + + result = (result << 5) - result + levelHash; + + an--; + al = LEVEL_NEXT(al); + } + + PG_FREE_IF_COPY(a, 0); + PG_RETURN_UINT32(result); +} + + +/* + * Hashes the elements of the path using the same logic as hash_array_extended + * (and hash_ltree). It differs from hash_array_extended only in how it handles + * zero length label paths. We return 1 + seed to ensure that the low 32 bits + * of the result match hash_ltree when the seed is 0, as required by the hash + * index support functions, but to also return a different value when there is + * a seed. + */ +Datum +hash_ltree_extended(PG_FUNCTION_ARGS) +{ + ltree *a = PG_GETARG_LTREE_P(0); + const uint64 seed = PG_GETARG_INT64(1); + + uint64 result = 1; + int an = a->numlevel; + ltree_level *al = LTREE_FIRST(a); + + if (an == 0) + { + PG_FREE_IF_COPY(a, 0); + PG_RETURN_UINT64(result + seed); + } + + while (an > 0) + { + uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *) al->name, al->len, seed)); + + result = (result << 5) - result + levelHash; + + an--; + al = LEVEL_NEXT(al); + } + + PG_FREE_IF_COPY(a, 0); + PG_RETURN_UINT64(result); +} diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql index d6996caf..388d5bb6 100644 --- a/contrib/ltree/ltreetest.sql +++ b/contrib/ltree/ltreetest.sql @@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies'); INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts'); CREATE INDEX path_gist_idx ON test USING gist(path); CREATE INDEX path_idx ON test USING btree(path); +CREATE INDEX path_hash_idx ON test USING hash(path); diff --git a/contrib/ltree/meson.build b/contrib/ltree/meson.build index 5862943e..29cc6865 100644 --- a/contrib/ltree/meson.build +++ b/contrib/ltree/meson.build @@ -32,6 +32,7 @@ install_data( 'ltree--1.0--1.1.sql', 'ltree--1.1--1.2.sql', 'ltree--1.1.sql', + 'ltree--1.2--1.3.sql', kwargs: contrib_data_args, ) diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql index 402096f6..869893db 100644 --- a/contrib/ltree/sql/ltree.sql +++ b/contrib/ltree/sql/ltree.sql @@ -282,9 +282,22 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null; SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery; SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e'; +-- Check that the hash_ltree() and hash_ltree_extended() function's lower +-- 32 bits match when the seed is 0 and do not match when the seed != 0 +SELECT v as value, hash_ltree(v)::bit(32) as standard, + hash_ltree_extended(v, 0)::bit(32) as extended0, + hash_ltree_extended(v, 1)::bit(32) as extended1 +FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree), + ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v) +WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32) + OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32); + + CREATE TABLE ltreetest (t ltree); \copy ltreetest FROM 'data/ltree.data' +SELECT count(*) from ltreetest; + SELECT * FROM ltreetest WHERE t < '12.3' order by t asc; SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc; SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; @@ -328,6 +341,36 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc; SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc; SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc; +drop index tstidx; + +--- test hash index + +create index tstidx on ltreetest using hash (t); +set enable_seqscan=off; +set enable_bitmapscan=off; + +EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; +SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; + +set enable_seqscan=on; +set enable_bitmapscan=on; + +-- test hash aggregate + +set enable_hashagg=on; +set enable_sort=off; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + +set enable_sort=on; + drop index tstidx; create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0)); create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025)); diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml index 00a6ae70..9584105b 100644 --- a/doc/src/sgml/ltree.sgml +++ b/doc/src/sgml/ltree.sgml @@ -623,6 +623,13 @@ Europe & Russia*@ & !Transportation <literal>>=</literal>, <literal>></literal> </para> </listitem> + <listitem> + <para> + Hash index over <type>ltree</type>: + <literal>=</literal> + </para> + </listitem> + <listitem> <para> GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal> @@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies'); INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts'); CREATE INDEX path_gist_idx ON test USING GIST (path); CREATE INDEX path_idx ON test USING BTREE (path); +CREATE INDEX path_hash_idx ON test USING HASH (path); </programlisting> <para> -- 2.34.1