Sorry for fat-fingering the previous reply -- I wanted to add:

On Fri, 2007-04-27 at 10:02 -0400, Tom Lane wrote:
> Perhaps a sufficiently robust way would be to form the hash as the
> XOR of each supplied digit, circular-shifted by say 3 times the
> digit's weight. 

The only objection I have to this is that it means we need to have
another hash function in the backend. The Jenkins hash we use in
hash_any() has been studied and we can have at least some confidence in
its collision-resistance, etc.

Anyway, attached is a revised version of the hash_any()-based patch.

-Neil

Index: src/backend/utils/adt/numeric.c
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/numeric.c,v
retrieving revision 1.101
diff -c -p -r1.101 numeric.c
*** src/backend/utils/adt/numeric.c	27 Feb 2007 23:48:08 -0000	1.101
--- src/backend/utils/adt/numeric.c	28 Apr 2007 22:28:54 -0000
***************
*** 26,31 ****
--- 26,32 ----
  #include <limits.h>
  #include <math.h>
  
+ #include "access/hash.h"
  #include "catalog/pg_type.h"
  #include "libpq/pqformat.h"
  #include "utils/array.h"
*************** cmp_numerics(Numeric num1, Numeric num2)
*** 1149,1154 ****
--- 1150,1184 ----
  	return result;
  }
  
+ Datum
+ hash_numeric(PG_FUNCTION_ARGS)
+ {
+ 	Numeric 	key = PG_GETARG_NUMERIC(0);
+ 	Datum 		digit_hash;
+ 	Datum 		result;
+ 
+ 	/* If it's NaN, don't try to hash the rest of the fields */
+ 	if (NUMERIC_IS_NAN(key))
+ 		PG_RETURN_UINT32(0);
+ 
+ 	/* If n_weight is zero, it is a zero, regardless of any other fields */
+ 	if (NUMERIC_NDIGITS(0) == 0)
+ 		PG_RETURN_UINT32(-1);
+ 
+ 	/*
+ 	 * XXX: we don't hash on the Numeric's scale, since two numerics
+ 	 * can compare equal but have different scales. We also don't hash
+ 	 * on the sign, although we easily could (since a sign difference
+ 	 * means inequality, it shouldn't affect correctness).
+ 	 */
+ 	digit_hash = hash_any((unsigned char *) NUMERIC_DIGITS(key),
+ 						  NUMERIC_NDIGITS(key) * sizeof(NumericDigit));
+ 
+ 	/* Mix in the weight, via XOR */
+ 	result = digit_hash ^ key->n_weight;
+ 	PG_RETURN_DATUM(result);
+ }
+ 
  
  /* ----------------------------------------------------------------------
   *
Index: src/include/catalog/pg_amop.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amop.h,v
retrieving revision 1.80
diff -c -p -r1.80 pg_amop.h
*** src/include/catalog/pg_amop.h	2 Apr 2007 03:49:40 -0000	1.80
--- src/include/catalog/pg_amop.h	28 Apr 2007 22:11:42 -0000
*************** DATA(insert (	2232   19 19 1 f 2334	405 
*** 568,573 ****
--- 568,575 ----
  DATA(insert (	2235   1033 1033 1 f  974	405 ));
  /* uuid_ops */ 
  DATA(insert (	2969   2950 2950 1 f 2972 405 ));
+ /* numeric_ops */
+ DATA(insert (	1998   1700 1700 1 f 1752 405 ));
  
  
  /*
Index: src/include/catalog/pg_amproc.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amproc.h,v
retrieving revision 1.64
diff -c -p -r1.64 pg_amproc.h
*** src/include/catalog/pg_amproc.h	2 Apr 2007 03:49:40 -0000	1.64
--- src/include/catalog/pg_amproc.h	28 Apr 2007 22:11:42 -0000
*************** DATA(insert (	1990   26 26 1 453 ));
*** 148,153 ****
--- 148,154 ----
  DATA(insert (	1992   30 30 1 457 ));
  DATA(insert (	1995   25 25 1 400 ));
  DATA(insert (	1997   1083 1083 1 452 ));
+ DATA(insert (	1998   1700 1700 1 432 ));
  DATA(insert (	1999   1184 1184 1 452 ));
  DATA(insert (	2001   1266 1266 1 1696 ));
  DATA(insert (	2040   1114 1114 1 452 ));
Index: src/include/catalog/pg_opclass.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opclass.h,v
retrieving revision 1.75
diff -c -p -r1.75 pg_opclass.h
*** src/include/catalog/pg_opclass.h	2 Apr 2007 03:49:40 -0000	1.75
--- src/include/catalog/pg_opclass.h	28 Apr 2007 22:11:42 -0000
*************** DATA(insert (	405		macaddr_ops			PGNSP P
*** 129,134 ****
--- 129,135 ----
  DATA(insert (	403		name_ops			PGNSP PGUID 1986   19 t 0 ));
  DATA(insert (	405		name_ops			PGNSP PGUID 1987   19 t 0 ));
  DATA(insert (	403		numeric_ops			PGNSP PGUID 1988 1700 t 0 ));
+ DATA(insert (	405		numeric_ops			PGNSP PGUID 1998 1700 t 0 ));
  DATA(insert OID = 1981 ( 403	oid_ops		PGNSP PGUID 1989   26 t 0 ));
  #define OID_BTREE_OPS_OID 1981
  DATA(insert (	405		oid_ops				PGNSP PGUID 1990   26 t 0 ));
Index: src/include/catalog/pg_operator.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_operator.h,v
retrieving revision 1.151
diff -c -p -r1.151 pg_operator.h
*** src/include/catalog/pg_operator.h	2 Apr 2007 03:49:40 -0000	1.151
--- src/include/catalog/pg_operator.h	28 Apr 2007 22:11:42 -0000
*************** DATA(insert OID = 1630 (  "!~~*"  PGNSP 
*** 675,681 ****
  
  /* NUMERIC type - OID's 1700-1799 */
  DATA(insert OID = 1751 (  "-"	   PGNSP PGUID l f f	0 1700 1700    0	0 numeric_uminus - - ));
! DATA(insert OID = 1752 (  "="	   PGNSP PGUID b t f 1700 1700	 16 1752 1753 numeric_eq eqsel eqjoinsel ));
  DATA(insert OID = 1753 (  "<>"	   PGNSP PGUID b f f 1700 1700	 16 1753 1752 numeric_ne neqsel neqjoinsel ));
  DATA(insert OID = 1754 (  "<"	   PGNSP PGUID b f f 1700 1700	 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel ));
  DATA(insert OID = 1755 (  "<="	   PGNSP PGUID b f f 1700 1700	 16 1757 1756 numeric_le scalarltsel scalarltjoinsel ));
--- 675,681 ----
  
  /* NUMERIC type - OID's 1700-1799 */
  DATA(insert OID = 1751 (  "-"	   PGNSP PGUID l f f	0 1700 1700    0	0 numeric_uminus - - ));
! DATA(insert OID = 1752 (  "="	   PGNSP PGUID b t t 1700 1700	 16 1752 1753 numeric_eq eqsel eqjoinsel ));
  DATA(insert OID = 1753 (  "<>"	   PGNSP PGUID b f f 1700 1700	 16 1753 1752 numeric_ne neqsel neqjoinsel ));
  DATA(insert OID = 1754 (  "<"	   PGNSP PGUID b f f 1700 1700	 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel ));
  DATA(insert OID = 1755 (  "<="	   PGNSP PGUID b f f 1700 1700	 16 1757 1756 numeric_le scalarltsel scalarltjoinsel ));
Index: src/include/catalog/pg_opfamily.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opfamily.h,v
retrieving revision 1.4
diff -c -p -r1.4 pg_opfamily.h
*** src/include/catalog/pg_opfamily.h	2 Apr 2007 03:49:40 -0000	1.4
--- src/include/catalog/pg_opfamily.h	28 Apr 2007 22:11:42 -0000
*************** DATA(insert OID = 1986 (	403		name_ops		
*** 93,98 ****
--- 93,99 ----
  #define NAME_BTREE_FAM_OID 1986
  DATA(insert OID = 1987 (	405		name_ops		PGNSP PGUID ));
  DATA(insert OID = 1988 (	403		numeric_ops		PGNSP PGUID ));
+ DATA(insert OID = 1998 (	405		numeric_ops		PGNSP PGUID ));
  DATA(insert OID = 1989 (	403		oid_ops			PGNSP PGUID ));
  #define OID_BTREE_FAM_OID 1989
  DATA(insert OID = 1990 (	405		oid_ops			PGNSP PGUID ));
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.454
diff -c -p -r1.454 pg_proc.h
*** src/include/catalog/pg_proc.h	2 Apr 2007 03:49:40 -0000	1.454
--- src/include/catalog/pg_proc.h	28 Apr 2007 22:11:42 -0000
*************** DATA(insert OID = 399 (  hashmacaddr	   
*** 838,843 ****
--- 838,845 ----
  DESCR("hash");
  DATA(insert OID = 422 (  hashinet		   PGNSP PGUID 12 1 0 f f t f i 1 23 "869" _null_ _null_ _null_ hashinet - _null_ ));
  DESCR("hash");
+ DATA(insert OID = 432 (  hash_numeric	   PGNSP PGUID 12 1 0 f f t f i 1 23 "1700" _null_ _null_ _null_ hash_numeric - _null_ ));
+ DESCR("hash");
  DATA(insert OID = 458 (  text_larger	   PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_larger - _null_ ));
  DESCR("larger of two");
  DATA(insert OID = 459 (  text_smaller	   PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_smaller - _null_ ));
Index: src/include/utils/builtins.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.291
diff -c -p -r1.291 builtins.h
*** src/include/utils/builtins.h	2 Apr 2007 03:49:41 -0000	1.291
--- src/include/utils/builtins.h	28 Apr 2007 22:11:42 -0000
*************** extern Datum int2_avg_accum(PG_FUNCTION_
*** 883,888 ****
--- 883,889 ----
  extern Datum int4_avg_accum(PG_FUNCTION_ARGS);
  extern Datum int8_avg(PG_FUNCTION_ARGS);
  extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
+ extern Datum hash_numeric(PG_FUNCTION_ARGS);
  
  /* ri_triggers.c */
  extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)---------------------------
TIP 6: explain analyze is your friend

Reply via email to