There is currently no support for hashing numerics. This prevents
numerics from being hash indexed or used in a hashed aggregation. This
patch makes the necessary changes to the catalogs to enable hashing for
numerics, and implements a first sketch at a hash function for numerics.

For any two numerics that compare equal, we need to compute the same
hash value for both datums, even if their bit patterns differ. This
patch computes the hash from the "n_weight" and "n_data" fields of the
numeric -- notably, it doesn't use the numeric's "scale" field, since
two equal numerics may have different scales. The hash seems to return
the correct results for the simple test cases that I've tried, but I'm
not very familiar with the details of the numeric implementation -- can
anyone comment on whether this hash function is correct?

Thanks to Sailesh Krishnamurthy for reporting this problem.

-Neil

Index: src/backend/utils/adt/numeric.c
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/numeric.c,v
retrieving revision 1.101
diff -c -p -r1.101 numeric.c
*** src/backend/utils/adt/numeric.c	27 Feb 2007 23:48:08 -0000	1.101
--- src/backend/utils/adt/numeric.c	27 Apr 2007 07:25:38 -0000
***************
*** 26,31 ****
--- 26,32 ----
  #include <limits.h>
  #include <math.h>
  
+ #include "access/hash.h"
  #include "catalog/pg_type.h"
  #include "libpq/pqformat.h"
  #include "utils/array.h"
*************** cmp_numerics(Numeric num1, Numeric num2)
*** 1149,1154 ****
--- 1150,1184 ----
  	return result;
  }
  
+ Datum
+ hash_numeric(PG_FUNCTION_ARGS)
+ {
+ 	Numeric 	key = PG_GETARG_NUMERIC(0);
+ 	Datum 		digit_hash;
+ 	Datum 		weight_hash;
+ 	Datum 		result;
+ 
+ 	if (NUMERIC_IS_NAN(key))
+ 		PG_RETURN_UINT32(0);
+ 
+ 	/*
+ 	 * XXX: we don't hash on the Numeric's scale, since two numerics
+ 	 * can compare equal but have different scales. We also don't hash
+ 	 * on the sign, although we easily could (since a sign difference
+ 	 * means inequality, it shouldn't affect correctness).
+ 	 */
+ 	digit_hash = hash_any((unsigned char *) NUMERIC_DIGITS(key),
+ 						  NUMERIC_NDIGITS(key) * sizeof(NumericDigit));
+ 	weight_hash = DirectFunctionCall1(hashint2,
+ 									  Int16GetDatum(key->n_weight));
+ 
+ 	/*
+ 	 * XXX: we combine the hashes with XOR. Is this the right choice?
+ 	 */
+ 	result = digit_hash ^ weight_hash;
+ 	PG_RETURN_DATUM(result);
+ }
+ 
  
  /* ----------------------------------------------------------------------
   *
Index: src/include/catalog/pg_amop.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amop.h,v
retrieving revision 1.80
diff -c -p -r1.80 pg_amop.h
*** src/include/catalog/pg_amop.h	2 Apr 2007 03:49:40 -0000	1.80
--- src/include/catalog/pg_amop.h	27 Apr 2007 07:38:32 -0000
*************** DATA(insert (	2232   19 19 1 f 2334	405 
*** 568,573 ****
--- 568,575 ----
  DATA(insert (	2235   1033 1033 1 f  974	405 ));
  /* uuid_ops */ 
  DATA(insert (	2969   2950 2950 1 f 2972 405 ));
+ /* numeric_ops */
+ DATA(insert (	1998   1700 1700 1 f 1752 405 ));
  
  
  /*
Index: src/include/catalog/pg_amproc.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amproc.h,v
retrieving revision 1.64
diff -c -p -r1.64 pg_amproc.h
*** src/include/catalog/pg_amproc.h	2 Apr 2007 03:49:40 -0000	1.64
--- src/include/catalog/pg_amproc.h	27 Apr 2007 07:37:45 -0000
*************** DATA(insert (	1990   26 26 1 453 ));
*** 148,153 ****
--- 148,154 ----
  DATA(insert (	1992   30 30 1 457 ));
  DATA(insert (	1995   25 25 1 400 ));
  DATA(insert (	1997   1083 1083 1 452 ));
+ DATA(insert (	1998   1700 1700 1 432 ));
  DATA(insert (	1999   1184 1184 1 452 ));
  DATA(insert (	2001   1266 1266 1 1696 ));
  DATA(insert (	2040   1114 1114 1 452 ));
Index: src/include/catalog/pg_opclass.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opclass.h,v
retrieving revision 1.75
diff -c -p -r1.75 pg_opclass.h
*** src/include/catalog/pg_opclass.h	2 Apr 2007 03:49:40 -0000	1.75
--- src/include/catalog/pg_opclass.h	27 Apr 2007 07:06:54 -0000
*************** DATA(insert (	405		macaddr_ops			PGNSP P
*** 129,134 ****
--- 129,135 ----
  DATA(insert (	403		name_ops			PGNSP PGUID 1986   19 t 0 ));
  DATA(insert (	405		name_ops			PGNSP PGUID 1987   19 t 0 ));
  DATA(insert (	403		numeric_ops			PGNSP PGUID 1988 1700 t 0 ));
+ DATA(insert (	405		numeric_ops			PGNSP PGUID 1998 1700 t 0 ));
  DATA(insert OID = 1981 ( 403	oid_ops		PGNSP PGUID 1989   26 t 0 ));
  #define OID_BTREE_OPS_OID 1981
  DATA(insert (	405		oid_ops				PGNSP PGUID 1990   26 t 0 ));
Index: src/include/catalog/pg_operator.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_operator.h,v
retrieving revision 1.151
diff -c -p -r1.151 pg_operator.h
*** src/include/catalog/pg_operator.h	2 Apr 2007 03:49:40 -0000	1.151
--- src/include/catalog/pg_operator.h	27 Apr 2007 07:06:54 -0000
*************** DATA(insert OID = 1630 (  "!~~*"  PGNSP 
*** 675,681 ****
  
  /* NUMERIC type - OID's 1700-1799 */
  DATA(insert OID = 1751 (  "-"	   PGNSP PGUID l f f	0 1700 1700    0	0 numeric_uminus - - ));
! DATA(insert OID = 1752 (  "="	   PGNSP PGUID b t f 1700 1700	 16 1752 1753 numeric_eq eqsel eqjoinsel ));
  DATA(insert OID = 1753 (  "<>"	   PGNSP PGUID b f f 1700 1700	 16 1753 1752 numeric_ne neqsel neqjoinsel ));
  DATA(insert OID = 1754 (  "<"	   PGNSP PGUID b f f 1700 1700	 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel ));
  DATA(insert OID = 1755 (  "<="	   PGNSP PGUID b f f 1700 1700	 16 1757 1756 numeric_le scalarltsel scalarltjoinsel ));
--- 675,681 ----
  
  /* NUMERIC type - OID's 1700-1799 */
  DATA(insert OID = 1751 (  "-"	   PGNSP PGUID l f f	0 1700 1700    0	0 numeric_uminus - - ));
! DATA(insert OID = 1752 (  "="	   PGNSP PGUID b t t 1700 1700	 16 1752 1753 numeric_eq eqsel eqjoinsel ));
  DATA(insert OID = 1753 (  "<>"	   PGNSP PGUID b f f 1700 1700	 16 1753 1752 numeric_ne neqsel neqjoinsel ));
  DATA(insert OID = 1754 (  "<"	   PGNSP PGUID b f f 1700 1700	 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel ));
  DATA(insert OID = 1755 (  "<="	   PGNSP PGUID b f f 1700 1700	 16 1757 1756 numeric_le scalarltsel scalarltjoinsel ));
Index: src/include/catalog/pg_opfamily.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opfamily.h,v
retrieving revision 1.4
diff -c -p -r1.4 pg_opfamily.h
*** src/include/catalog/pg_opfamily.h	2 Apr 2007 03:49:40 -0000	1.4
--- src/include/catalog/pg_opfamily.h	27 Apr 2007 07:06:54 -0000
*************** DATA(insert OID = 1986 (	403		name_ops		
*** 93,98 ****
--- 93,99 ----
  #define NAME_BTREE_FAM_OID 1986
  DATA(insert OID = 1987 (	405		name_ops		PGNSP PGUID ));
  DATA(insert OID = 1988 (	403		numeric_ops		PGNSP PGUID ));
+ DATA(insert OID = 1998 (	405		numeric_ops		PGNSP PGUID ));
  DATA(insert OID = 1989 (	403		oid_ops			PGNSP PGUID ));
  #define OID_BTREE_FAM_OID 1989
  DATA(insert OID = 1990 (	405		oid_ops			PGNSP PGUID ));
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.454
diff -c -p -r1.454 pg_proc.h
*** src/include/catalog/pg_proc.h	2 Apr 2007 03:49:40 -0000	1.454
--- src/include/catalog/pg_proc.h	27 Apr 2007 07:39:04 -0000
*************** DATA(insert OID = 399 (  hashmacaddr	   
*** 838,843 ****
--- 838,845 ----
  DESCR("hash");
  DATA(insert OID = 422 (  hashinet		   PGNSP PGUID 12 1 0 f f t f i 1 23 "869" _null_ _null_ _null_ hashinet - _null_ ));
  DESCR("hash");
+ DATA(insert OID = 432 (  hash_numeric	   PGNSP PGUID 12 1 0 f f t f i 1 23 "1700" _null_ _null_ _null_ hash_numeric - _null_ ));
+ DESCR("hash");
  DATA(insert OID = 458 (  text_larger	   PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_larger - _null_ ));
  DESCR("larger of two");
  DATA(insert OID = 459 (  text_smaller	   PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_smaller - _null_ ));
Index: src/include/utils/builtins.h
===================================================================
RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.291
diff -c -p -r1.291 builtins.h
*** src/include/utils/builtins.h	2 Apr 2007 03:49:41 -0000	1.291
--- src/include/utils/builtins.h	27 Apr 2007 07:06:54 -0000
*************** extern Datum int2_avg_accum(PG_FUNCTION_
*** 883,888 ****
--- 883,889 ----
  extern Datum int4_avg_accum(PG_FUNCTION_ARGS);
  extern Datum int8_avg(PG_FUNCTION_ARGS);
  extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
+ extern Datum hash_numeric(PG_FUNCTION_ARGS);
  
  /* ri_triggers.c */
  extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to