There is currently no support for hashing numerics. This prevents numerics from being hash indexed or used in a hashed aggregation. This patch makes the necessary changes to the catalogs to enable hashing for numerics, and implements a first sketch at a hash function for numerics.
For any two numerics that compare equal, we need to compute the same hash value for both datums, even if their bit patterns differ. This patch computes the hash from the "n_weight" and "n_data" fields of the numeric -- notably, it doesn't use the numeric's "scale" field, since two equal numerics may have different scales. The hash seems to return the correct results for the simple test cases that I've tried, but I'm not very familiar with the details of the numeric implementation -- can anyone comment on whether this hash function is correct? Thanks to Sailesh Krishnamurthy for reporting this problem. -Neil
Index: src/backend/utils/adt/numeric.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/numeric.c,v retrieving revision 1.101 diff -c -p -r1.101 numeric.c *** src/backend/utils/adt/numeric.c 27 Feb 2007 23:48:08 -0000 1.101 --- src/backend/utils/adt/numeric.c 27 Apr 2007 07:25:38 -0000 *************** *** 26,31 **** --- 26,32 ---- #include <limits.h> #include <math.h> + #include "access/hash.h" #include "catalog/pg_type.h" #include "libpq/pqformat.h" #include "utils/array.h" *************** cmp_numerics(Numeric num1, Numeric num2) *** 1149,1154 **** --- 1150,1184 ---- return result; } + Datum + hash_numeric(PG_FUNCTION_ARGS) + { + Numeric key = PG_GETARG_NUMERIC(0); + Datum digit_hash; + Datum weight_hash; + Datum result; + + if (NUMERIC_IS_NAN(key)) + PG_RETURN_UINT32(0); + + /* + * XXX: we don't hash on the Numeric's scale, since two numerics + * can compare equal but have different scales. We also don't hash + * on the sign, although we easily could (since a sign difference + * means inequality, it shouldn't affect correctness). + */ + digit_hash = hash_any((unsigned char *) NUMERIC_DIGITS(key), + NUMERIC_NDIGITS(key) * sizeof(NumericDigit)); + weight_hash = DirectFunctionCall1(hashint2, + Int16GetDatum(key->n_weight)); + + /* + * XXX: we combine the hashes with XOR. Is this the right choice? + */ + result = digit_hash ^ weight_hash; + PG_RETURN_DATUM(result); + } + /* ---------------------------------------------------------------------- * Index: src/include/catalog/pg_amop.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amop.h,v retrieving revision 1.80 diff -c -p -r1.80 pg_amop.h *** src/include/catalog/pg_amop.h 2 Apr 2007 03:49:40 -0000 1.80 --- src/include/catalog/pg_amop.h 27 Apr 2007 07:38:32 -0000 *************** DATA(insert ( 2232 19 19 1 f 2334 405 *** 568,573 **** --- 568,575 ---- DATA(insert ( 2235 1033 1033 1 f 974 405 )); /* uuid_ops */ DATA(insert ( 2969 2950 2950 1 f 2972 405 )); + /* numeric_ops */ + DATA(insert ( 1998 1700 1700 1 f 1752 405 )); /* Index: src/include/catalog/pg_amproc.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amproc.h,v retrieving revision 1.64 diff -c -p -r1.64 pg_amproc.h *** src/include/catalog/pg_amproc.h 2 Apr 2007 03:49:40 -0000 1.64 --- src/include/catalog/pg_amproc.h 27 Apr 2007 07:37:45 -0000 *************** DATA(insert ( 1990 26 26 1 453 )); *** 148,153 **** --- 148,154 ---- DATA(insert ( 1992 30 30 1 457 )); DATA(insert ( 1995 25 25 1 400 )); DATA(insert ( 1997 1083 1083 1 452 )); + DATA(insert ( 1998 1700 1700 1 432 )); DATA(insert ( 1999 1184 1184 1 452 )); DATA(insert ( 2001 1266 1266 1 1696 )); DATA(insert ( 2040 1114 1114 1 452 )); Index: src/include/catalog/pg_opclass.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opclass.h,v retrieving revision 1.75 diff -c -p -r1.75 pg_opclass.h *** src/include/catalog/pg_opclass.h 2 Apr 2007 03:49:40 -0000 1.75 --- src/include/catalog/pg_opclass.h 27 Apr 2007 07:06:54 -0000 *************** DATA(insert ( 405 macaddr_ops PGNSP P *** 129,134 **** --- 129,135 ---- DATA(insert ( 403 name_ops PGNSP PGUID 1986 19 t 0 )); DATA(insert ( 405 name_ops PGNSP PGUID 1987 19 t 0 )); DATA(insert ( 403 numeric_ops PGNSP PGUID 1988 1700 t 0 )); + DATA(insert ( 405 numeric_ops PGNSP PGUID 1998 1700 t 0 )); DATA(insert OID = 1981 ( 403 oid_ops PGNSP PGUID 1989 26 t 0 )); #define OID_BTREE_OPS_OID 1981 DATA(insert ( 405 oid_ops PGNSP PGUID 1990 26 t 0 )); Index: src/include/catalog/pg_operator.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_operator.h,v retrieving revision 1.151 diff -c -p -r1.151 pg_operator.h *** src/include/catalog/pg_operator.h 2 Apr 2007 03:49:40 -0000 1.151 --- src/include/catalog/pg_operator.h 27 Apr 2007 07:06:54 -0000 *************** DATA(insert OID = 1630 ( "!~~*" PGNSP *** 675,681 **** /* NUMERIC type - OID's 1700-1799 */ DATA(insert OID = 1751 ( "-" PGNSP PGUID l f f 0 1700 1700 0 0 numeric_uminus - - )); ! DATA(insert OID = 1752 ( "=" PGNSP PGUID b t f 1700 1700 16 1752 1753 numeric_eq eqsel eqjoinsel )); DATA(insert OID = 1753 ( "<>" PGNSP PGUID b f f 1700 1700 16 1753 1752 numeric_ne neqsel neqjoinsel )); DATA(insert OID = 1754 ( "<" PGNSP PGUID b f f 1700 1700 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel )); DATA(insert OID = 1755 ( "<=" PGNSP PGUID b f f 1700 1700 16 1757 1756 numeric_le scalarltsel scalarltjoinsel )); --- 675,681 ---- /* NUMERIC type - OID's 1700-1799 */ DATA(insert OID = 1751 ( "-" PGNSP PGUID l f f 0 1700 1700 0 0 numeric_uminus - - )); ! DATA(insert OID = 1752 ( "=" PGNSP PGUID b t t 1700 1700 16 1752 1753 numeric_eq eqsel eqjoinsel )); DATA(insert OID = 1753 ( "<>" PGNSP PGUID b f f 1700 1700 16 1753 1752 numeric_ne neqsel neqjoinsel )); DATA(insert OID = 1754 ( "<" PGNSP PGUID b f f 1700 1700 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel )); DATA(insert OID = 1755 ( "<=" PGNSP PGUID b f f 1700 1700 16 1757 1756 numeric_le scalarltsel scalarltjoinsel )); Index: src/include/catalog/pg_opfamily.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opfamily.h,v retrieving revision 1.4 diff -c -p -r1.4 pg_opfamily.h *** src/include/catalog/pg_opfamily.h 2 Apr 2007 03:49:40 -0000 1.4 --- src/include/catalog/pg_opfamily.h 27 Apr 2007 07:06:54 -0000 *************** DATA(insert OID = 1986 ( 403 name_ops *** 93,98 **** --- 93,99 ---- #define NAME_BTREE_FAM_OID 1986 DATA(insert OID = 1987 ( 405 name_ops PGNSP PGUID )); DATA(insert OID = 1988 ( 403 numeric_ops PGNSP PGUID )); + DATA(insert OID = 1998 ( 405 numeric_ops PGNSP PGUID )); DATA(insert OID = 1989 ( 403 oid_ops PGNSP PGUID )); #define OID_BTREE_FAM_OID 1989 DATA(insert OID = 1990 ( 405 oid_ops PGNSP PGUID )); Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.454 diff -c -p -r1.454 pg_proc.h *** src/include/catalog/pg_proc.h 2 Apr 2007 03:49:40 -0000 1.454 --- src/include/catalog/pg_proc.h 27 Apr 2007 07:39:04 -0000 *************** DATA(insert OID = 399 ( hashmacaddr *** 838,843 **** --- 838,845 ---- DESCR("hash"); DATA(insert OID = 422 ( hashinet PGNSP PGUID 12 1 0 f f t f i 1 23 "869" _null_ _null_ _null_ hashinet - _null_ )); DESCR("hash"); + DATA(insert OID = 432 ( hash_numeric PGNSP PGUID 12 1 0 f f t f i 1 23 "1700" _null_ _null_ _null_ hash_numeric - _null_ )); + DESCR("hash"); DATA(insert OID = 458 ( text_larger PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_larger - _null_ )); DESCR("larger of two"); DATA(insert OID = 459 ( text_smaller PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_smaller - _null_ )); Index: src/include/utils/builtins.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/builtins.h,v retrieving revision 1.291 diff -c -p -r1.291 builtins.h *** src/include/utils/builtins.h 2 Apr 2007 03:49:41 -0000 1.291 --- src/include/utils/builtins.h 27 Apr 2007 07:06:54 -0000 *************** extern Datum int2_avg_accum(PG_FUNCTION_ *** 883,888 **** --- 883,889 ---- extern Datum int4_avg_accum(PG_FUNCTION_ARGS); extern Datum int8_avg(PG_FUNCTION_ARGS); extern Datum width_bucket_numeric(PG_FUNCTION_ARGS); + extern Datum hash_numeric(PG_FUNCTION_ARGS); /* ri_triggers.c */ extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)--------------------------- TIP 4: Have you searched our list archives? http://archives.postgresql.org