# Re: [PATCHES] Hash function for numeric (WIP)

```On Thu, 2007-03-05 at 23:57 -0400, Tom Lane wrote:
> Hm, but apply hash_any() to the remaining digits?  That might work, if
> you are careful about how you factor the weight into it (or just not try
> to use the weight in the hash).```
```
Attached is a patch that implements this idea. Since leading or trailing
zeroes are far from the common case, I think we should still include the
weight in the hash when possible: the patch does so when it doesn't find
a leading zero in the Numeric. I did some testing by constructing an
in-memory Numeric with leading and trailing zeroes, and this approach
seems to work.

Unless you see anything else that needs fixing, I'll apply this patch to
HEAD in a day or two.

-Neil

```
```Index: src/backend/utils/adt/numeric.c
===================================================================
retrieving revision 1.101
diff -c -p -r1.101 numeric.c
*** src/backend/utils/adt/numeric.c	27 Feb 2007 23:48:08 -0000	1.101
--- src/backend/utils/adt/numeric.c	7 May 2007 01:16:30 -0000
***************
*** 26,31 ****
--- 26,32 ----
#include <limits.h>
#include <math.h>

+ #include "access/hash.h"
#include "catalog/pg_type.h"
#include "libpq/pqformat.h"
#include "utils/array.h"
*************** cmp_numerics(Numeric num1, Numeric num2)
*** 1149,1154 ****
--- 1150,1224 ----
return result;
}

+ Datum
+ hash_numeric(PG_FUNCTION_ARGS)
+ {
+ 	Numeric 	key = PG_GETARG_NUMERIC(0);
+ 	Datum 		digit_hash;
+ 	Datum 		result;
+ 	int 		start_offset;
+ 	int 		end_offset;
+ 	int 		i;
+ 	int 		hash_len;
+
+ 	/* If it's NaN, don't try to hash the rest of the fields */
+ 	if (NUMERIC_IS_NAN(key))
+ 		PG_RETURN_UINT32(0);
+
+ 	/*
+ 	 * If ndigits is zero, the value of the number is zero, regardless
+ 	 * of any other fields
+ 	 */
+ 	if (NUMERIC_NDIGITS(key) == 0)
+ 		PG_RETURN_UINT32(-1);
+
+ 	/*
+ 	 * Omit any leading or trailing zeros from the input to the
+ 	 * hash. The numeric implementation *should* guarantee that
+ 	 * leading and trailing zeros are suppressed, but we're
+ 	 * paranoid. Note that we measure the start and end offsets in
+ 	 * units of NumericDigits, not bytes.
+ 	 */
+ 	start_offset = 0;
+ 	end_offset 	 = 0;
+
+ 	for (i = 0; i < NUMERIC_NDIGITS(key); i++)
+ 	{
+ 		if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0)
+ 			break;
+
+ 		start_offset++;
+ 	}
+
+ 	for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--)
+ 	{
+ 		if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0)
+ 			break;
+
+ 		end_offset++;
+ 	}
+
+ 	/*
+ 	 * Note that we don't hash on the Numeric's scale, since two
+ 	 * numerics can compare equal but have different scales. We also
+ 	 * don't hash on the sign, although we could: since a sign
+ 	 * difference implies inequality, this shouldn't affect correctness.
+ 	 */
+ 	hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset;
+ 	digit_hash = hash_any((unsigned char *) (NUMERIC_DIGITS(key) + start_offset),
+                           hash_len * sizeof(NumericDigit));
+
+ 	/*
+ 	 * Mix in the weight, via XOR. Since the weight is the exponent to
+ 	 * which the first digit is implicitly raised, if the first digit
+ 	 * was a zero we shouldn't include the weight in the hash.
+ 	 */
+ 	if (start_offset == 0)
+ 		result = digit_hash ^ key->n_weight;
+
+ 	PG_RETURN_DATUM(result);
+ }
+

/* ----------------------------------------------------------------------
*
Index: src/include/catalog/pg_amop.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amop.h,v
retrieving revision 1.80
diff -c -p -r1.80 pg_amop.h
*** src/include/catalog/pg_amop.h	2 Apr 2007 03:49:40 -0000	1.80
--- src/include/catalog/pg_amop.h	7 May 2007 00:48:32 -0000
*************** DATA(insert (	2232   19 19 1 f 2334	405
*** 568,573 ****
--- 568,575 ----
DATA(insert (	2235   1033 1033 1 f  974	405 ));
/* uuid_ops */
DATA(insert (	2969   2950 2950 1 f 2972 405 ));
+ /* numeric_ops */
+ DATA(insert (	1998   1700 1700 1 f 1752 405 ));

/*
Index: src/include/catalog/pg_amproc.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amproc.h,v
retrieving revision 1.64
diff -c -p -r1.64 pg_amproc.h
*** src/include/catalog/pg_amproc.h	2 Apr 2007 03:49:40 -0000	1.64
--- src/include/catalog/pg_amproc.h	7 May 2007 00:48:32 -0000
*************** DATA(insert (	1990   26 26 1 453 ));
*** 148,153 ****
--- 148,154 ----
DATA(insert (	1992   30 30 1 457 ));
DATA(insert (	1995   25 25 1 400 ));
DATA(insert (	1997   1083 1083 1 452 ));
+ DATA(insert (	1998   1700 1700 1 432 ));
DATA(insert (	1999   1184 1184 1 452 ));
DATA(insert (	2001   1266 1266 1 1696 ));
DATA(insert (	2040   1114 1114 1 452 ));
Index: src/include/catalog/pg_opclass.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opclass.h,v
retrieving revision 1.75
diff -c -p -r1.75 pg_opclass.h
*** src/include/catalog/pg_opclass.h	2 Apr 2007 03:49:40 -0000	1.75
--- src/include/catalog/pg_opclass.h	7 May 2007 00:48:32 -0000
*************** DATA(insert (	405		macaddr_ops			PGNSP P
*** 129,134 ****
--- 129,135 ----
DATA(insert (	403		name_ops			PGNSP PGUID 1986   19 t 0 ));
DATA(insert (	405		name_ops			PGNSP PGUID 1987   19 t 0 ));
DATA(insert (	403		numeric_ops			PGNSP PGUID 1988 1700 t 0 ));
+ DATA(insert (	405		numeric_ops			PGNSP PGUID 1998 1700 t 0 ));
DATA(insert OID = 1981 ( 403	oid_ops		PGNSP PGUID 1989   26 t 0 ));
#define OID_BTREE_OPS_OID 1981
DATA(insert (	405		oid_ops				PGNSP PGUID 1990   26 t 0 ));
Index: src/include/catalog/pg_operator.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_operator.h,v
retrieving revision 1.151
diff -c -p -r1.151 pg_operator.h
*** src/include/catalog/pg_operator.h	2 Apr 2007 03:49:40 -0000	1.151
--- src/include/catalog/pg_operator.h	7 May 2007 00:48:32 -0000
*************** DATA(insert OID = 1630 (  "!~~*"  PGNSP
*** 675,681 ****

/* NUMERIC type - OID's 1700-1799 */
DATA(insert OID = 1751 (  "-"	   PGNSP PGUID l f f	0 1700 1700    0	0 numeric_uminus - - ));
! DATA(insert OID = 1752 (  "="	   PGNSP PGUID b t f 1700 1700	 16 1752 1753 numeric_eq eqsel eqjoinsel ));
DATA(insert OID = 1753 (  "<>"	   PGNSP PGUID b f f 1700 1700	 16 1753 1752 numeric_ne neqsel neqjoinsel ));
DATA(insert OID = 1754 (  "<"	   PGNSP PGUID b f f 1700 1700	 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel ));
DATA(insert OID = 1755 (  "<="	   PGNSP PGUID b f f 1700 1700	 16 1757 1756 numeric_le scalarltsel scalarltjoinsel ));
--- 675,681 ----

/* NUMERIC type - OID's 1700-1799 */
DATA(insert OID = 1751 (  "-"	   PGNSP PGUID l f f	0 1700 1700    0	0 numeric_uminus - - ));
! DATA(insert OID = 1752 (  "="	   PGNSP PGUID b t t 1700 1700	 16 1752 1753 numeric_eq eqsel eqjoinsel ));
DATA(insert OID = 1753 (  "<>"	   PGNSP PGUID b f f 1700 1700	 16 1753 1752 numeric_ne neqsel neqjoinsel ));
DATA(insert OID = 1754 (  "<"	   PGNSP PGUID b f f 1700 1700	 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel ));
DATA(insert OID = 1755 (  "<="	   PGNSP PGUID b f f 1700 1700	 16 1757 1756 numeric_le scalarltsel scalarltjoinsel ));
Index: src/include/catalog/pg_opfamily.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opfamily.h,v
retrieving revision 1.4
diff -c -p -r1.4 pg_opfamily.h
*** src/include/catalog/pg_opfamily.h	2 Apr 2007 03:49:40 -0000	1.4
--- src/include/catalog/pg_opfamily.h	7 May 2007 00:48:32 -0000
*************** DATA(insert OID = 1986 (	403		name_ops
*** 93,98 ****
--- 93,99 ----
#define NAME_BTREE_FAM_OID 1986
DATA(insert OID = 1987 (	405		name_ops		PGNSP PGUID ));
DATA(insert OID = 1988 (	403		numeric_ops		PGNSP PGUID ));
+ DATA(insert OID = 1998 (	405		numeric_ops		PGNSP PGUID ));
DATA(insert OID = 1989 (	403		oid_ops			PGNSP PGUID ));
#define OID_BTREE_FAM_OID 1989
DATA(insert OID = 1990 (	405		oid_ops			PGNSP PGUID ));
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.454
diff -c -p -r1.454 pg_proc.h
*** src/include/catalog/pg_proc.h	2 Apr 2007 03:49:40 -0000	1.454
--- src/include/catalog/pg_proc.h	7 May 2007 01:07:21 -0000
*************** DATA(insert OID = 399 (  hashmacaddr
*** 838,843 ****
--- 838,845 ----
DESCR("hash");
DATA(insert OID = 422 (  hashinet		   PGNSP PGUID 12 1 0 f f t f i 1 23 "869" _null_ _null_ _null_ hashinet - _null_ ));
DESCR("hash");
+ DATA(insert OID = 432 (  hash_numeric	   PGNSP PGUID 12 1 0 f f t f i 1 23 "1700" _null_ _null_ _null_ hash_numeric - _null_ ));
+ DESCR("hash");
DATA(insert OID = 458 (  text_larger	   PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_larger - _null_ ));
DESCR("larger of two");
DATA(insert OID = 459 (  text_smaller	   PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_smaller - _null_ ));
Index: src/include/utils/builtins.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.291
diff -c -p -r1.291 builtins.h
*** src/include/utils/builtins.h	2 Apr 2007 03:49:41 -0000	1.291
--- src/include/utils/builtins.h	7 May 2007 01:07:12 -0000
*************** extern Datum int2_avg_accum(PG_FUNCTION_
*** 883,888 ****
--- 883,889 ----
extern Datum int4_avg_accum(PG_FUNCTION_ARGS);
extern Datum int8_avg(PG_FUNCTION_ARGS);
extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
+ extern Datum hash_numeric(PG_FUNCTION_ARGS);

/* ri_triggers.c */
extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
```
```---------------------------(end of broadcast)---------------------------
TIP 3: Have you checked our extensive FAQ?

http://www.postgresql.org/docs/faq
```