On Sun, 2007-06-05 at 21:30 -0400, Tom Lane wrote: > You can do it always if you simply decrement the weight for each leading > zero removed.
On reflection, the patch as given was wrong anyway: if two Numerics are identical except for the presence of leading zeroes, we can't consider the weight when hashing one of them but not the other. > The patch as given is wrong since it makes the test for no-digits > before instead of after removing zeroes.) Okay, how about the attached revision? > It'd be a good idea if you repeat the previous number-of-collisions > experiment on this code. I'll do this shortly. -Neil
Index: src/backend/utils/adt/numeric.c =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/backend/utils/adt/numeric.c,v retrieving revision 1.101 diff -c -p -r1.101 numeric.c *** src/backend/utils/adt/numeric.c 27 Feb 2007 23:48:08 -0000 1.101 --- src/backend/utils/adt/numeric.c 7 May 2007 01:49:18 -0000 *************** *** 26,31 **** --- 26,32 ---- #include <limits.h> #include <math.h> + #include "access/hash.h" #include "catalog/pg_type.h" #include "libpq/pqformat.h" #include "utils/array.h" *************** cmp_numerics(Numeric num1, Numeric num2) *** 1149,1154 **** --- 1150,1230 ---- return result; } + Datum + hash_numeric(PG_FUNCTION_ARGS) + { + Numeric key = PG_GETARG_NUMERIC(0); + Datum digit_hash; + Datum result; + int weight; + int start_offset; + int end_offset; + int i; + int hash_len; + + /* If it's NaN, don't try to hash the rest of the fields */ + if (NUMERIC_IS_NAN(key)) + PG_RETURN_UINT32(0); + + weight = key->n_weight; + start_offset = 0; + end_offset = 0; + + /* + * Omit any leading or trailing zeros from the input to the + * hash. The numeric implementation *should* guarantee that + * leading and trailing zeros are suppressed, but we're + * paranoid. Note that we measure the starting and ending offsets + * in units of NumericDigits, not bytes. + */ + for (i = 0; i < NUMERIC_NDIGITS(key); i++) + { + if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0) + break; + + start_offset++; + /* + * The weight is effectively the # of digits before the + * decimal point, so decrement it for each leading zero we + * skip. + */ + weight--; + } + + /* + * If there are no non-zero digits, then the value of the number + * is zero, regardless of any other fields. + */ + if (NUMERIC_NDIGITS(key) == start_offset) + PG_RETURN_UINT32(-1); + + for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--) + { + if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0) + break; + + end_offset++; + } + + /* If we get here, there should be at least one non-zero digit */ + Assert(start_offset + end_offset < NUMERIC_NDIGITS(key)); + + /* + * Note that we don't hash on the Numeric's scale, since two + * numerics can compare equal but have different scales. We also + * don't hash on the sign, although we could: since a sign + * difference implies inequality, this shouldn't affect correctness. + */ + hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset; + digit_hash = hash_any((unsigned char *) (NUMERIC_DIGITS(key) + start_offset), + hash_len * sizeof(NumericDigit)); + + /* Mix in the weight, via XOR */ + result = digit_hash ^ weight; + + PG_RETURN_DATUM(result); + } + /* ---------------------------------------------------------------------- * Index: src/include/catalog/pg_amop.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amop.h,v retrieving revision 1.80 diff -c -p -r1.80 pg_amop.h *** src/include/catalog/pg_amop.h 2 Apr 2007 03:49:40 -0000 1.80 --- src/include/catalog/pg_amop.h 7 May 2007 00:48:32 -0000 *************** DATA(insert ( 2232 19 19 1 f 2334 405 *** 568,573 **** --- 568,575 ---- DATA(insert ( 2235 1033 1033 1 f 974 405 )); /* uuid_ops */ DATA(insert ( 2969 2950 2950 1 f 2972 405 )); + /* numeric_ops */ + DATA(insert ( 1998 1700 1700 1 f 1752 405 )); /* Index: src/include/catalog/pg_amproc.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_amproc.h,v retrieving revision 1.64 diff -c -p -r1.64 pg_amproc.h *** src/include/catalog/pg_amproc.h 2 Apr 2007 03:49:40 -0000 1.64 --- src/include/catalog/pg_amproc.h 7 May 2007 00:48:32 -0000 *************** DATA(insert ( 1990 26 26 1 453 )); *** 148,153 **** --- 148,154 ---- DATA(insert ( 1992 30 30 1 457 )); DATA(insert ( 1995 25 25 1 400 )); DATA(insert ( 1997 1083 1083 1 452 )); + DATA(insert ( 1998 1700 1700 1 432 )); DATA(insert ( 1999 1184 1184 1 452 )); DATA(insert ( 2001 1266 1266 1 1696 )); DATA(insert ( 2040 1114 1114 1 452 )); Index: src/include/catalog/pg_opclass.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opclass.h,v retrieving revision 1.75 diff -c -p -r1.75 pg_opclass.h *** src/include/catalog/pg_opclass.h 2 Apr 2007 03:49:40 -0000 1.75 --- src/include/catalog/pg_opclass.h 7 May 2007 00:48:32 -0000 *************** DATA(insert ( 405 macaddr_ops PGNSP P *** 129,134 **** --- 129,135 ---- DATA(insert ( 403 name_ops PGNSP PGUID 1986 19 t 0 )); DATA(insert ( 405 name_ops PGNSP PGUID 1987 19 t 0 )); DATA(insert ( 403 numeric_ops PGNSP PGUID 1988 1700 t 0 )); + DATA(insert ( 405 numeric_ops PGNSP PGUID 1998 1700 t 0 )); DATA(insert OID = 1981 ( 403 oid_ops PGNSP PGUID 1989 26 t 0 )); #define OID_BTREE_OPS_OID 1981 DATA(insert ( 405 oid_ops PGNSP PGUID 1990 26 t 0 )); Index: src/include/catalog/pg_operator.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_operator.h,v retrieving revision 1.151 diff -c -p -r1.151 pg_operator.h *** src/include/catalog/pg_operator.h 2 Apr 2007 03:49:40 -0000 1.151 --- src/include/catalog/pg_operator.h 7 May 2007 00:48:32 -0000 *************** DATA(insert OID = 1630 ( "!~~*" PGNSP *** 675,681 **** /* NUMERIC type - OID's 1700-1799 */ DATA(insert OID = 1751 ( "-" PGNSP PGUID l f f 0 1700 1700 0 0 numeric_uminus - - )); ! DATA(insert OID = 1752 ( "=" PGNSP PGUID b t f 1700 1700 16 1752 1753 numeric_eq eqsel eqjoinsel )); DATA(insert OID = 1753 ( "<>" PGNSP PGUID b f f 1700 1700 16 1753 1752 numeric_ne neqsel neqjoinsel )); DATA(insert OID = 1754 ( "<" PGNSP PGUID b f f 1700 1700 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel )); DATA(insert OID = 1755 ( "<=" PGNSP PGUID b f f 1700 1700 16 1757 1756 numeric_le scalarltsel scalarltjoinsel )); --- 675,681 ---- /* NUMERIC type - OID's 1700-1799 */ DATA(insert OID = 1751 ( "-" PGNSP PGUID l f f 0 1700 1700 0 0 numeric_uminus - - )); ! DATA(insert OID = 1752 ( "=" PGNSP PGUID b t t 1700 1700 16 1752 1753 numeric_eq eqsel eqjoinsel )); DATA(insert OID = 1753 ( "<>" PGNSP PGUID b f f 1700 1700 16 1753 1752 numeric_ne neqsel neqjoinsel )); DATA(insert OID = 1754 ( "<" PGNSP PGUID b f f 1700 1700 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel )); DATA(insert OID = 1755 ( "<=" PGNSP PGUID b f f 1700 1700 16 1757 1756 numeric_le scalarltsel scalarltjoinsel )); Index: src/include/catalog/pg_opfamily.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_opfamily.h,v retrieving revision 1.4 diff -c -p -r1.4 pg_opfamily.h *** src/include/catalog/pg_opfamily.h 2 Apr 2007 03:49:40 -0000 1.4 --- src/include/catalog/pg_opfamily.h 7 May 2007 00:48:32 -0000 *************** DATA(insert OID = 1986 ( 403 name_ops *** 93,98 **** --- 93,99 ---- #define NAME_BTREE_FAM_OID 1986 DATA(insert OID = 1987 ( 405 name_ops PGNSP PGUID )); DATA(insert OID = 1988 ( 403 numeric_ops PGNSP PGUID )); + DATA(insert OID = 1998 ( 405 numeric_ops PGNSP PGUID )); DATA(insert OID = 1989 ( 403 oid_ops PGNSP PGUID )); #define OID_BTREE_FAM_OID 1989 DATA(insert OID = 1990 ( 405 oid_ops PGNSP PGUID )); Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.454 diff -c -p -r1.454 pg_proc.h *** src/include/catalog/pg_proc.h 2 Apr 2007 03:49:40 -0000 1.454 --- src/include/catalog/pg_proc.h 7 May 2007 01:07:21 -0000 *************** DATA(insert OID = 399 ( hashmacaddr *** 838,843 **** --- 838,845 ---- DESCR("hash"); DATA(insert OID = 422 ( hashinet PGNSP PGUID 12 1 0 f f t f i 1 23 "869" _null_ _null_ _null_ hashinet - _null_ )); DESCR("hash"); + DATA(insert OID = 432 ( hash_numeric PGNSP PGUID 12 1 0 f f t f i 1 23 "1700" _null_ _null_ _null_ hash_numeric - _null_ )); + DESCR("hash"); DATA(insert OID = 458 ( text_larger PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_larger - _null_ )); DESCR("larger of two"); DATA(insert OID = 459 ( text_smaller PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_smaller - _null_ )); Index: src/include/utils/builtins.h =================================================================== RCS file: /home/neilc/postgres/cvs_root/pgsql/src/include/utils/builtins.h,v retrieving revision 1.291 diff -c -p -r1.291 builtins.h *** src/include/utils/builtins.h 2 Apr 2007 03:49:41 -0000 1.291 --- src/include/utils/builtins.h 7 May 2007 01:07:12 -0000 *************** extern Datum int2_avg_accum(PG_FUNCTION_ *** 883,888 **** --- 883,889 ---- extern Datum int4_avg_accum(PG_FUNCTION_ARGS); extern Datum int8_avg(PG_FUNCTION_ARGS); extern Datum width_bucket_numeric(PG_FUNCTION_ARGS); + extern Datum hash_numeric(PG_FUNCTION_ARGS); /* ri_triggers.c */ extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)--------------------------- TIP 3: Have you checked our extensive FAQ? http://www.postgresql.org/docs/faq