On Thu, Feb 1, 2024 at 11:11 PM vignesh C <vignes...@gmail.com> wrote:
>
> On Wed, 6 Dec 2023 at 04:08, Tommy Pavlicek <tommypav...@gmail.com> wrote:
> >
> > Thanks.
> >
> > I've attached the latest version that updates the naming in line with
> > the convention.
> >
> > On Mon, Dec 4, 2023 at 12:46 AM jian he <jian.universal...@gmail.com> wrote:
> > >
> > > On Fri, Dec 1, 2023 at 8:44 AM Tommy Pavlicek <tommypav...@gmail.com> 
> > > wrote:
> > > >
> > > >
> > > > Patch updated for those comments (and a touch of cleanup in the tests) 
> > > > attached.
> > >
> > > it would be a better name as hash_ltree than ltree_hash, similar logic
> > > applies to ltree_hash_extended.
> > > that would be the convention. see: 
> > > https://stackoverflow.com/a/69650940/15603477
> > >
> > >
> > > Other than that, it looks good.
>
> CFBot shows one of the test is failing as in [1]:
> diff -U3 /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out
> /tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out
> --- /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out 2024-01-31
> 15:18:42.893039599 +0000
> +++ /tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out
> 2024-01-31 15:23:25.309028749 +0000
> @@ -1442,9 +1442,14 @@
>         ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
>  WHERE  hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
>         OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
> - value | standard | extended0 | extended1
> --------+----------+-----------+-----------
> -(0 rows)
> +    value    |             standard             |
> extended0             |            extended1
> +-------------+----------------------------------+----------------------------------+----------------------------------
> + 0           | 10001010100010010000000000001011 |
> 01011001111001000100011001011011 | 01011001111001000100011010011111
> + 0.1         | 10100000111110001010110001001110 |
> 00111100100010001100110111010101 | 00111100100010001101100011010101
> + 0.1.2       | 01111000011100000101111101110100 |
> 10101110011101011000000011010111 | 10101110011101110010001111000011
> + 0           | 10001010100010010000000000001011 |
> 01011001111001000100011001011011 | 01011001111001000100011010011111
> + 0_asd.1_ASD | 01000010001010000000101001001101 |
> 00111100100010001100110111010101 | 00111100100010001101100011010101
> +(5 rows)
>
> Please post an updated version for the same.
>
> [1] - 
> https://api.cirrus-ci.com/v1/artifact/task/5572544858685440/testrun/build-32/testrun/ltree/regress/regression.diffs
>

It only fails on Linux - Debian Bullseye - Meson.
I fixed the white space, named it v5.
I also made the following changes:
from

uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed);
uint32 levelHash = hash_any((unsigned char *) al->name, al->len);

to
uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *)
al->name, al->len, seed));
uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name,
al->len));

(these two line live in different functions)

I have some problems testing it locally, so I post the patch.
From ffc0e3daea5e490d8b9e6b7a0345f780d88b5624 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Fri, 2 Feb 2024 16:03:56 +0800
Subject: [PATCH v5 1/1] add hash functions for the ltree extension.

---
 contrib/ltree/Makefile            |  2 +-
 contrib/ltree/expected/ltree.out  | 65 +++++++++++++++++++++++++++++
 contrib/ltree/ltree--1.2--1.3.sql | 23 +++++++++++
 contrib/ltree/ltree.control       |  2 +-
 contrib/ltree/ltree_op.c          | 68 +++++++++++++++++++++++++++++++
 contrib/ltree/ltreetest.sql       |  1 +
 contrib/ltree/meson.build         |  1 +
 contrib/ltree/sql/ltree.sql       | 43 +++++++++++++++++++
 doc/src/sgml/ltree.sgml           |  8 ++++
 9 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 contrib/ltree/ltree--1.2--1.3.sql

diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile
index 770769a7..f50f2c94 100644
--- a/contrib/ltree/Makefile
+++ b/contrib/ltree/Makefile
@@ -14,7 +14,7 @@ OBJS = \
 	ltxtquery_op.o
 
 EXTENSION = ltree
-DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
+DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
 PGFILEDESC = "ltree - hierarchical label data type"
 
 HEADERS = ltree.h
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 984cd030..dd4f0fc1 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
  g.b.c.d.e
 (1 row)
 
+-- Check that the hash_ltree() and hash_ltree_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, hash_ltree(v)::bit(32) as standard,
+       hash_ltree_extended(v, 0)::bit(32) as extended0,
+       hash_ltree_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+       ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE  hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
+       OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1 
+-------+----------+-----------+-----------
+(0 rows)
+
 CREATE TABLE ltreetest (t ltree);
 \copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+ count 
+-------
+  1006
+(1 row)
+
 SELECT * FROM ltreetest WHERE t <  '12.3' order by t asc;
                 t                 
 ----------------------------------
@@ -7832,6 +7851,52 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
  23.3.32.21.5.14.10.17.1
 (4 rows)
 
+drop index tstidx;
+--- test hash index
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t =  '12.3' order by t asc;
+              QUERY PLAN              
+--------------------------------------
+ Index Scan using tstidx on ltreetest
+   Index Cond: (t = '12.3'::ltree)
+(2 rows)
+
+SELECT * FROM ltreetest WHERE t =  '12.3' order by t asc;
+  t   
+------
+ 12.3
+(1 row)
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+-- test hash aggregate
+set enable_hashagg=on;
+set enable_sort=off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Aggregate
+   ->  HashAggregate
+         Group Key: ltreetest.t
+         ->  Append
+               ->  Seq Scan on ltreetest
+               ->  Seq Scan on ltreetest ltreetest_1
+(6 rows)
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ count 
+-------
+  1006
+(1 row)
+
+set enable_sort=on;
 drop index tstidx;
 create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
 ERROR:  value 0 out of bounds for option "siglen"
diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql
new file mode 100644
index 00000000..bc9a34dd
--- /dev/null
+++ b/contrib/ltree/ltree--1.2--1.3.sql
@@ -0,0 +1,23 @@
+/* contrib/ltree/ltree--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION hash_ltree(ltree)
+RETURNS integer
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION hash_ltree_extended(ltree, bigint)
+RETURNS bigint
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR CLASS hash_ltree_ops
+DEFAULT FOR TYPE ltree USING hash
+AS
+	OPERATOR	1	= ,
+	FUNCTION	1	hash_ltree(ltree),
+	FUNCTION	2	hash_ltree_extended(ltree, bigint);
+
+ALTER OPERATOR =(ltree, ltree) SET (HASHES);
diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control
index b408d647..c2cbeda9 100644
--- a/contrib/ltree/ltree.control
+++ b/contrib/ltree/ltree.control
@@ -1,6 +1,6 @@
 # ltree extension
 comment = 'data type for hierarchical tree-like structures'
-default_version = '1.2'
+default_version = '1.3'
 module_pathname = '$libdir/ltree'
 relocatable = true
 trusted = true
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index da1db5fc..4c464153 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -9,6 +9,7 @@
 
 #include "access/htup_details.h"
 #include "catalog/pg_statistic.h"
+#include "common/hashfn.h"
 #include "ltree.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -37,6 +38,8 @@ PG_FUNCTION_INFO_V1(lca);
 PG_FUNCTION_INFO_V1(ltree2text);
 PG_FUNCTION_INFO_V1(text2ltree);
 PG_FUNCTION_INFO_V1(ltreeparentsel);
+PG_FUNCTION_INFO_V1(hash_ltree);
+PG_FUNCTION_INFO_V1(hash_ltree_extended);
 
 int
 ltree_compare(const ltree *a, const ltree *b)
@@ -588,3 +591,68 @@ ltreeparentsel(PG_FUNCTION_ARGS)
 
 	PG_RETURN_FLOAT8((float8) selec);
 }
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array
+ */
+Datum
+hash_ltree(PG_FUNCTION_ARGS)
+{
+	ltree	   *a = PG_GETARG_LTREE_P(0);
+
+	uint32		result = 1;
+	int			an = a->numlevel;
+	ltree_level *al = LTREE_FIRST(a);
+
+	while (an > 0)
+	{
+		uint32		levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name, al->len));
+
+		result = (result << 5) - result + levelHash;
+
+		an--;
+		al = LEVEL_NEXT(al);
+	}
+
+	PG_FREE_IF_COPY(a, 0);
+	PG_RETURN_UINT32(result);
+}
+
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array_extended
+ * (and hash_ltree). It differs from hash_array_extended only in how it handles
+ * zero length label paths. We return 1 + seed to ensure that the low 32 bits
+ * of the result match hash_ltree when the seed is 0, as required by the hash
+ * index support functions, but to also return a different value when there is
+ * a seed.
+ */
+Datum
+hash_ltree_extended(PG_FUNCTION_ARGS)
+{
+	ltree	   *a = PG_GETARG_LTREE_P(0);
+	const uint64 seed = PG_GETARG_INT64(1);
+
+	uint64		result = 1;
+	int			an = a->numlevel;
+	ltree_level *al = LTREE_FIRST(a);
+
+	if (an == 0)
+	{
+		PG_FREE_IF_COPY(a, 0);
+		PG_RETURN_UINT64(result + seed);
+	}
+
+	while (an > 0)
+	{
+		uint64		levelHash = DatumGetUInt64(hash_any_extended((unsigned char *) al->name, al->len, seed));
+
+		result = (result << 5) - result + levelHash;
+
+		an--;
+		al = LEVEL_NEXT(al);
+	}
+
+	PG_FREE_IF_COPY(a, 0);
+	PG_RETURN_UINT64(result);
+}
diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql
index d6996caf..388d5bb6 100644
--- a/contrib/ltree/ltreetest.sql
+++ b/contrib/ltree/ltreetest.sql
@@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
 INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
 CREATE INDEX path_gist_idx ON test USING gist(path);
 CREATE INDEX path_idx ON test USING btree(path);
+CREATE INDEX path_hash_idx ON test USING hash(path);
diff --git a/contrib/ltree/meson.build b/contrib/ltree/meson.build
index 5862943e..29cc6865 100644
--- a/contrib/ltree/meson.build
+++ b/contrib/ltree/meson.build
@@ -32,6 +32,7 @@ install_data(
   'ltree--1.0--1.1.sql',
   'ltree--1.1--1.2.sql',
   'ltree--1.1.sql',
+  'ltree--1.2--1.3.sql',
   kwargs: contrib_data_args,
 )
 
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 402096f6..869893db 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -282,9 +282,22 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null;
 SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;
 SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
 
+-- Check that the hash_ltree() and hash_ltree_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, hash_ltree(v)::bit(32) as standard,
+       hash_ltree_extended(v, 0)::bit(32) as extended0,
+       hash_ltree_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+       ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE  hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
+       OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
+
+
 CREATE TABLE ltreetest (t ltree);
 \copy ltreetest FROM 'data/ltree.data'
 
+SELECT count(*) from ltreetest;
+
 SELECT * FROM ltreetest WHERE t <  '12.3' order by t asc;
 SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc;
 SELECT * FROM ltreetest WHERE t =  '12.3' order by t asc;
@@ -328,6 +341,36 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc;
 SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
 SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
 
+drop index tstidx;
+
+--- test hash index
+
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t =  '12.3' order by t asc;
+SELECT * FROM ltreetest WHERE t =  '12.3' order by t asc;
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+
+-- test hash aggregate
+
+set enable_hashagg=on;
+set enable_sort=off;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+set enable_sort=on;
+
 drop index tstidx;
 create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
 create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 00a6ae70..9584105b 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -623,6 +623,13 @@ Europe &amp; Russia*@ &amp; !Transportation
      <literal>&gt;=</literal>, <literal>&gt;</literal>
     </para>
    </listitem>
+   <listitem>
+    <para>
+     Hash index over <type>ltree</type>:
+     <literal>=</literal>
+    </para>
+   </listitem>
+
    <listitem>
     <para>
      GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
@@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
 INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
 CREATE INDEX path_gist_idx ON test USING GIST (path);
 CREATE INDEX path_idx ON test USING BTREE (path);
+CREATE INDEX path_hash_idx ON test USING HASH (path);
 </programlisting>
 
   <para>
-- 
2.34.1

Reply via email to