> I agree with you that we can support other join type and anti join later, > If others don’t have any objection in doing other parts later I will mark as > "Ready For Committer".
I updated the patch to cover semi and anti joins with eqjoinsel_semi(). I think it is better than returning a constant. The new version attached with the new version of the test script. Can you please look at it again and mark it as "ready for committer" if it seems okay to you?
diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c index d0d806f..eca9e7c 100644 --- a/src/backend/utils/adt/network_selfuncs.c +++ b/src/backend/utils/adt/network_selfuncs.c @@ -1,32 +1,669 @@ /*------------------------------------------------------------------------- * * network_selfuncs.c * Functions for selectivity estimation of inet/cidr operators * - * Currently these are just stubs, but we hope to do better soon. + * Estimates are based on null fraction, distinct value count, most common + * values, and histogram of inet/cidr datatypes. * * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/utils/adt/network_selfuncs.c * *------------------------------------------------------------------------- */ #include "postgres.h" +#include <math.h> + +#include "access/htup_details.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_statistic.h" +#include "utils/lsyscache.h" #include "utils/inet.h" +#include "utils/selfuncs.h" +/* Default selectivity constant for the inet overlap operator */ +#define DEFAULT_OVERLAP_SEL 0.01 + +/* Default selectivity constant for the other operators */ +#define DEFAULT_INCLUSION_SEL 0.005 + +/* Default selectivity for given operator */ +#define DEFAULT_SEL(operator) \ + ((operator) == OID_INET_OVERLAP_OP ? \ + DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL) + +static Selectivity networkjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2); +extern double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, + VariableStatData *vardata2, RelOptInfo *inner_rel); +extern RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); +static short int inet_opr_order(Oid operator); +static Selectivity inet_his_inclusion_selec(Datum *values, int nvalues, + Datum *constvalue, short int opr_order); +static Selectivity inet_mcv_join_selec(Datum *values1, float4 *numbers1, + int nvalues1, Datum *values2, float4 *numbers2, + int nvalues2, Oid operator); +static Selectivity inet_mcv_his_selec(Datum *mcv_values, float4 *mcv_numbers, + int mcv_nvalues, Datum *his_values, int his_nvalues, + int red_nvalues, short int opr_order, + Selectivity *max_selec_pointer); +static Selectivity inet_his_inclusion_join_selec(Datum *his1_values, + int his1_nvalues, Datum *his2_values, int his2_nvalues, + int red_nvalues, short int opr_order); +static short int inet_inclusion_cmp(inet *left, inet *right, + short int opr_order); +static short int inet_masklen_inclusion_cmp(inet *left, inet *right, + short int opr_order); +static short int inet_his_match_divider(inet *boundary, inet *query, + short int opr_order); + +/* + * Selectivity estimation for the subnet inclusion operators + */ Datum networksel(PG_FUNCTION_ARGS) { - PG_RETURN_FLOAT8(0.001); + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3), + his_nvalues; + VariableStatData vardata; + Node *other; + bool varonleft; + Selectivity selec, + max_mcv_selec; + Datum constvalue, + *his_values; + Form_pg_statistic stats; + double nullfrac; + FmgrInfo proc; + + /* + * If expression is not (variable op something) or (something op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + } + + /* All of the subnet inclusion operators are strict. */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + if (!HeapTupleIsValid(vardata.statsTuple)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + } + + constvalue = ((Const *) other)->constvalue; + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + nullfrac = stats ? stats->stanullfrac : 0.0; + + fmgr_info(get_opcode(operator), &proc); + selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft, + &max_mcv_selec); + + if (get_attstatsslot(vardata.statsTuple, + vardata.atttype, vardata.atttypmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + NULL, + &his_values, &his_nvalues, + NULL, NULL)) + { + selec += (1.0 - nullfrac - max_mcv_selec) * + inet_his_inclusion_selec(his_values, his_nvalues, &constvalue, + varonleft ? inet_opr_order(operator) : + inet_opr_order(operator) * -1); + + free_attstatsslot(vardata.atttype, his_values, his_nvalues, NULL, 0); + } + else if (max_mcv_selec == 0.0) + selec = (1.0 - nullfrac) * DEFAULT_SEL(operator); + + /* Result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(selec); } +/* + * Join selectivity estimation for the subnet inclusion operators + * + * This function is the copy of eqjoinsel() on selfuncs.c except the comments + * and that it calls networkjoinsel_inner() instead of eqjoinsel_inner(). + */ Datum networkjoinsel(PG_FUNCTION_ARGS) { - PG_RETURN_FLOAT8(0.001); + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); +#ifdef NOT_USED + JoinType jointype = (JoinType) PG_GETARG_INT16(3); +#endif + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); + double selec; + VariableStatData vardata1; + VariableStatData vardata2; + bool join_is_reversed; + RelOptInfo *inner_rel; + + get_join_variables(root, args, sjinfo, + &vardata1, &vardata2, &join_is_reversed); + + switch (sjinfo->jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + case JOIN_FULL: + + /* + * Selectivity for left join is not exactly same as inner join, + * but is neglected. + */ + if (!join_is_reversed) + selec = networkjoinsel_inner(operator, &vardata1, &vardata2); + else + selec = networkjoinsel_inner(get_commutator(operator), + &vardata2, &vardata1); + break; + case JOIN_SEMI: + case JOIN_ANTI: + + /* + * Selectivity estimation functions of semi and anti joins are not + * implemented for the subnet inclusion operators. + * eqjoinsel_semi() used to cover. It makes small or big mistakes + * based on the join type, the operator and the ratio between the + * row counts. + */ + inner_rel = find_join_input_rel(root, sjinfo->min_righthand); + + if (!join_is_reversed) + selec = eqjoinsel_semi(operator, &vardata1, &vardata2, + inner_rel); + else + selec = eqjoinsel_semi(get_commutator(operator), + &vardata2, &vardata1, + inner_rel); + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", + (int) sjinfo->jointype); + selec = 0; /* keep compiler quiet */ + break; + } + + ReleaseVariableStats(vardata1); + ReleaseVariableStats(vardata2); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} + +/* + * Inner join selectivity estimation for the subnet inclusion operators + * + * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram + * selectivity for join using the subnet inclusion operators. Unlike the + * join selectivity function for the equality operator, eqjoinsel_inner(), + * one to one matching of the values is not enough. Network inclusion + * operators are likely to match many to many. It requires to loop the MVC + * and histogram lists to the end. Also, MCV vs histogram selectiviy is + * not neglected as in eqjoinsel_inner(). + * + * To make the function faster only some of the values from the first + * MVC and histogram matched to the second histogram. It is calculated by + * log2(). + */ +static Selectivity +networkjoinsel_inner(Oid operator, + VariableStatData *vardata1, VariableStatData *vardata2) +{ + Form_pg_statistic stats; + double nullfrac1 = 0.0, + nullfrac2 = 0.0; + Selectivity selec = 0.0, + mcv1_max_selec = 0.0, + mcv2_max_selec = 0.0; + bool mcv1_exists = false, + mcv2_exists = false, + his1_exists = false, + his2_exists = false; + int mcv1_nvalues, + mcv2_nvalues, + mcv1_nnumbers, + mcv2_nnumbers, + his1_nvalues, + his2_nvalues, + red1_nvalues, + red2_nvalues; + Datum *mcv1_values, + *mcv2_values, + *his1_values, + *his2_values; + float4 *mcv1_numbers, + *mcv2_numbers; + + if (HeapTupleIsValid(vardata1->statsTuple)) + { + if ((stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple))) + nullfrac1 = stats->stanullfrac; + + mcv1_exists = get_attstatsslot(vardata1->statsTuple, + vardata1->atttype, vardata1->atttypmod, + STATISTIC_KIND_MCV, InvalidOid, + NULL, + &mcv1_values, &mcv1_nvalues, + &mcv1_numbers, &mcv1_nnumbers); + his1_exists = get_attstatsslot(vardata1->statsTuple, + vardata1->atttype, vardata1->atttypmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + NULL, + &his1_values, &his1_nvalues, + NULL, NULL); + red1_nvalues = ((int) log2(Max(mcv1_nvalues, his1_nvalues))) + 1; + } + + if (HeapTupleIsValid(vardata2->statsTuple)) + { + if ((stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple))) + nullfrac2 = stats->stanullfrac; + + mcv2_exists = get_attstatsslot(vardata2->statsTuple, + vardata2->atttype, vardata2->atttypmod, + STATISTIC_KIND_MCV, InvalidOid, + NULL, + &mcv2_values, &mcv2_nvalues, + &mcv2_numbers, &mcv2_nnumbers); + his2_exists = get_attstatsslot(vardata2->statsTuple, + vardata2->atttype, vardata2->atttypmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + NULL, + &his2_values, &his2_nvalues, + NULL, NULL); + red2_nvalues = ((int) log2(Max(mcv2_nvalues, his2_nvalues))) + 1; + } + + if (mcv1_exists && mcv2_exists) + selec += inet_mcv_join_selec(mcv1_values, mcv1_numbers, mcv1_nvalues, + mcv2_values, mcv2_numbers, mcv2_nvalues, + operator); + if (mcv1_exists && his2_exists) + selec += inet_mcv_his_selec(mcv1_values, mcv1_numbers, mcv1_nvalues, + his2_values, his2_nvalues, + Min(mcv1_nvalues, red1_nvalues), + inet_opr_order(operator), &mcv1_max_selec); + if (mcv2_exists && his1_exists) + selec += inet_mcv_his_selec(mcv2_values, mcv2_numbers, mcv2_nvalues, + his1_values, his1_nvalues, + Min(mcv2_nvalues, red2_nvalues), + inet_opr_order(operator), &mcv2_max_selec); + if (his1_exists && his2_exists) + selec += (1.0 - nullfrac1 - mcv1_max_selec) * + (1.0 - nullfrac2 - mcv2_max_selec) * + inet_his_inclusion_join_selec(his1_values, his1_nvalues, + his2_values, his2_nvalues, + Min(his1_nvalues, red1_nvalues), + inet_opr_order(operator)); + + if ((!mcv1_exists && !his1_exists) || (!mcv2_exists && !his2_exists)) + selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator); + + if (mcv1_exists) + free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues, + mcv1_numbers, mcv1_nnumbers); + if (mcv2_exists) + free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues, + mcv2_numbers, mcv2_nnumbers); + if (his1_exists) + free_attstatsslot(vardata1->atttype, his1_values, his1_nvalues, + NULL, 0); + if (his2_exists) + free_attstatsslot(vardata2->atttype, his2_values, his2_nvalues, + NULL, 0); + + return selec; +} + +/* + * Practical comparable numbers for the subnet inclusion operators + */ +static short int +inet_opr_order(Oid operator) +{ + switch (operator) + { + case OID_INET_SUP_OP: + return -2; + case OID_INET_SUPEQ_OP: + return -1; + case OID_INET_OVERLAP_OP: + return 0; + case OID_INET_SUBEQ_OP: + return 1; + case OID_INET_SUB_OP: + return 2; + default: + elog(ERROR, "unknown operator for inet inclusion selectivity"); + } +} + +/* + * Inet histogram inclusion selectivity estimation + * + * Calculates histogram selectivity for the subnet inclusion operators of + * the inet type. The return value is between 0 and 1. It should be + * corrected with the MVC selectivity and null fraction. If the constant + * is less than the first element or greater than the last element of + * the histogram the return value will be 0. + * + * The histogram is originally for the basic comparison operators. Only + * the common bits of the network part and the lenght of the network part + * (masklen) are appropriate for the subnet inclusion opeators. Fortunately, + * basic comparison fits in this situation. Even so, the lenght of the + * network part would not really be significant in the histogram. This would + * lead to big mistakes for data sets with uneven masklen distribution. + * To avoid this problem, comparison with the left and the right side of the + * buckets used together. + * + * Histogram bucket matches are calculated in two forms. If the constant + * matches both sides the bucket is considered as fully matched. If the + * constant matches only the right side the bucket, it is not considered + * as matched unless it is the last bucket, because it will match the next + * bucked. If all of these buckets would be considered as matched, it would + * lead to unfair multiple matches for some constants. + * + * The second form is to match the bucket partially. We try to calculate + * dividers for both of the boundaries. If the address family of the boundary + * does not match the constant or comparison of the lenght of the network + * parts is not true by the operator, the divider for the boundary would not + * taken into account. If both of the dividers can be calculated the greater + * one will be used to mimimize the mistake in the buckets which have + * disperate masklens. + * + * The divider on the partial bucket match is imagined as the distance + * between the decisive bits and the common bits of the addresses. It will + * be* used as power of two as it is the natural scale for the IP network + * inclusion. The partial bucket match divider calculation is an empirical + * formula and subject to change with more experiment. + * + * For partial match with buckets which have different address families + * on the left and right sides only the boundary with the same address + * family is taken into consideration. This can cause more mistakes for these + * buckets if the masklens of their boundaries are also disparate. It can + * only be the case for one bucket, if there are addresses with different + * families on the column. It seems as a better option than not considering + * these buckets. + */ +static Selectivity +inet_his_inclusion_selec(Datum *values, int nvalues, Datum *constvalue, + short int opr_order) +{ + inet *query, + *left, + *right; + float match; + int i; + short int left_order, + right_order, + left_divider, + right_divider; + + match = 0.0; + query = DatumGetInetP(*constvalue); + left = DatumGetInetP(values[0]); + left_order = inet_inclusion_cmp(left, query, opr_order); + + for (i = 1; i < nvalues; i++) + { + right = DatumGetInetP(values[i]); + right_order = inet_inclusion_cmp(right, query, opr_order); + + if (left_order == 0 && right_order == 0) + { + /* Full bucket match. */ + + match += 1.0; + } + else if ((left_order <= 0 && right_order > 0) || + (left_order >= 0 && right_order < 0) || + (right_order == 0 && i == nvalues - 1)) + { + /* Partial bucket match. */ + + left_divider = inet_his_match_divider(left, query, opr_order); + right_divider = inet_his_match_divider(right, query, opr_order); + + if (left_divider >= 0 || right_divider >= 0) + match += 1.0 / pow(2, Max(left_divider, right_divider)); + } + + /* Shift the variables. */ + left = right; + left_order = right_order; + } + + /* There are nvalues - 1 buckets. */ + return match / (nvalues - 1); +} + +/* + * Inet MCV join selectivity estimation + * + * The original function of the operator used in this function, like the + * mcv_selectivity() on selfuncs.c. Actually, this function has nothing + * to do with the network data types except its name and location. + */ +static Selectivity +inet_mcv_join_selec(Datum *values1, float4 *numbers1, int nvalues1, + Datum *values2, float4 *numbers2, int nvalues2, + Oid operator) +{ + Selectivity selec; + FmgrInfo proc; + int i, + j; + + fmgr_info(get_opcode(operator), &proc); + selec = 0.0; + + for (i = 0; i < nvalues1; i++) + for (j = 0; j < nvalues2; j++) + if (DatumGetBool(FunctionCall2Coll(&proc, DEFAULT_COLLATION_OID, + values1[i], values2[j]))) + selec += numbers1[i] * numbers2[j]; + + return selec; +} + +/* + * Inet MCV vs histogram inclusion join selectivity estimation + * + * The function result is the selectivity, and the fraction of the total + * population of the MCV is returned into *max_selec_pointer. + */ +static Selectivity +inet_mcv_his_selec(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues, + Datum *his_values, int his_nvalues, int red_nvalues, + short int opr_order, Selectivity *max_selec_pointer) +{ + Selectivity selec, + red_selec, + max_selec; + int i; + + selec = 0.0; + red_selec = 0.0; + max_selec = 0.0; + + for (i = 0; i < mcv_nvalues; i++) + { + if (i < red_nvalues) + { + selec += mcv_numbers[i] * + inet_his_inclusion_selec(his_values, his_nvalues, + &mcv_values[i], opr_order); + + red_selec += mcv_numbers[i]; + } + + max_selec += mcv_numbers[i]; + } + + *max_selec_pointer = max_selec; + return selec * max_selec / red_selec; +} + +/* + * Inet histogram inclusion join selectivity estimation + * + * Selected values from the first histogram will be matched with the second. + * red_nvalues of the values will by discarding same amount of values from + * the begging and the end of the list, on the grounds that they are outliers + * and hence not very representative. + */ +static Selectivity +inet_his_inclusion_join_selec(Datum *his1_values, int his1_nvalues, + Datum *his2_values, int his2_nvalues, + int red_nvalues, short int opr_order) +{ + float match; + int nskip, + i; + + match = 0.0; + nskip = (his1_nvalues - red_nvalues) / 2; + + for (i = nskip; i < his1_nvalues - nskip; i++) + match += inet_his_inclusion_selec(his2_values, his2_nvalues, + &his1_values[i], opr_order); + + return match / (his1_nvalues - 2 * nskip); +} + +/* + * Comparison function for the subnet inclusion operators + * + * Comparison is compatible with the basic comparison function for the inet + * type. See network_cmp_internal on network.c for the original. Basic + * comparison operators are implemented with the network_cmp_internal + * function. It is possible to implement the subnet inclusion operators with + * this function. + * + * Comparison is first on the common bits of the network part, then on + * the length of the network part (masklen) as the network_cmp_internal + * function. Only the first part is on this function. The second part is + * seperated to another function for reusability. The difference between + * the second part and the original network_cmp_internal is that the operator + * is used while comparing the lengths of the network parts. See the second + * part on the inet_masklen_inclusion_cmp function below. + */ +static short int +inet_inclusion_cmp(inet *left, inet *right, short int opr_order) +{ + if (ip_family(left) == ip_family(right)) + { + short int order; + + order = bitncmp(ip_addr(left), ip_addr(right), + Min(ip_bits(left), ip_bits(right))); + + if (order != 0) + return order; + + return inet_masklen_inclusion_cmp(left, right, opr_order); + } + + return ip_family(left) - ip_family(right); +} + +/* + * Masklen comparison function for the subnet inclusion operators + * + * Compares the lengths of network parts of the inputs using the operator. + * If the comparision is okay for the operator the return value will be 0. + * Otherwise the return value will be less than or greater than 0 with + * respect to the operator. + */ +static short int +inet_masklen_inclusion_cmp(inet *left, inet *right, short int opr_order) +{ + if (ip_family(left) == ip_family(right)) + { + short int order; + + order = ip_bits(left) - ip_bits(right); + + if ((order > 0 && opr_order >= 0) || + (order == 0 && opr_order >= -1 && opr_order <= 1) || + (order < 0 && opr_order <= 0)) + return 0; + + return opr_order; + } + + return ip_family(left) - ip_family(right); +} + +/* + * Inet histogram partial match divider calculation + * + * First the families and the lenghts of the network parts are compared + * using the subnet inclusion operator. The divider will be calculated + * using the masklens and the common bits of the addresses. -1 will be + * returned if it cannot be calculated. + */ +static short int +inet_his_match_divider(inet *boundary, inet *query, short int opr_order) +{ + if (inet_masklen_inclusion_cmp(boundary, query, opr_order) == 0) + { + short int min_bits, + decisive_bits; + + min_bits = Min(ip_bits(boundary), ip_bits(query)); + + /* + * Set the decisive bits from the one which should contain the other + * according to the operator. + */ + if (opr_order < 0) + decisive_bits = ip_bits(boundary); + else if (opr_order > 0) + decisive_bits = ip_bits(query); + else + decisive_bits = min_bits; + + if (min_bits > 0) + return decisive_bits - bitncommon(ip_addr(boundary), ip_addr(query), + min_bits); + return decisive_bits; + } + + return -1; } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e932ccf..8dcda93 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -149,21 +149,21 @@ static double var_eq_const(VariableStatData *vardata, Oid operator, bool varonleft); static double var_eq_non_const(VariableStatData *vardata, Oid operator, Node *other, bool varonleft); static double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype); static double eqjoinsel_inner(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2); -static double eqjoinsel_semi(Oid operator, +double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); static double convert_numeric_to_scalar(Datum value, Oid typid); static void convert_string_to_scalar(char *value, double *scaledvalue, char *lobound, double *scaledlobound, @@ -182,21 +182,21 @@ static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, static char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata); static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max); static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max); -static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); +RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive); static Selectivity regex_selectivity(const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len); static Datum string_to_datum(const char *str, Oid datatype); static Const *string_to_const(const char *str, Oid datatype); @@ -2418,21 +2418,21 @@ eqjoinsel_inner(Oid operator, return selec; } /* * eqjoinsel_semi --- eqjoinsel for semi join * * (Also used for anti join, which we are supposed to estimate the same way.) * Caller has ensured that vardata1 is the LHS variable. */ -static double +double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel) { double selec; double nd1; double nd2; bool isdefault1; bool isdefault2; Form_pg_statistic stats1 = NULL; @@ -5094,21 +5094,21 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, return have_data; } /* * find_join_input_rel * Look up the input relation for a join. * * We assume that the input relation's RelOptInfo must have been constructed * already. */ -static RelOptInfo * +RelOptInfo * find_join_input_rel(PlannerInfo *root, Relids relids) { RelOptInfo *rel = NULL; switch (bms_membership(relids)) { case BMS_EMPTY_SET: /* should not happen */ break; case BMS_SINGLETON: diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index d7dcd1c..3b827fc 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -1135,32 +1135,33 @@ DESCR("not equal"); DATA(insert OID = 1203 ( "<" PGNSP PGUID b f f 869 869 16 1205 1206 network_lt scalarltsel scalarltjoinsel )); DESCR("less than"); DATA(insert OID = 1204 ( "<=" PGNSP PGUID b f f 869 869 16 1206 1205 network_le scalarltsel scalarltjoinsel )); DESCR("less than or equal"); DATA(insert OID = 1205 ( ">" PGNSP PGUID b f f 869 869 16 1203 1204 network_gt scalargtsel scalargtjoinsel )); DESCR("greater than"); DATA(insert OID = 1206 ( ">=" PGNSP PGUID b f f 869 869 16 1204 1203 network_ge scalargtsel scalargtjoinsel )); DESCR("greater than or equal"); DATA(insert OID = 931 ( "<<" PGNSP PGUID b f f 869 869 16 933 0 network_sub networksel networkjoinsel )); DESCR("is subnet"); -#define OID_INET_SUB_OP 931 +#define OID_INET_SUB_OP 931 DATA(insert OID = 932 ( "<<=" PGNSP PGUID b f f 869 869 16 934 0 network_subeq networksel networkjoinsel )); DESCR("is subnet or equal"); -#define OID_INET_SUBEQ_OP 932 +#define OID_INET_SUBEQ_OP 932 DATA(insert OID = 933 ( ">>" PGNSP PGUID b f f 869 869 16 931 0 network_sup networksel networkjoinsel )); DESCR("is supernet"); -#define OID_INET_SUP_OP 933 +#define OID_INET_SUP_OP 933 DATA(insert OID = 934 ( ">>=" PGNSP PGUID b f f 869 869 16 932 0 network_supeq networksel networkjoinsel )); DESCR("is supernet or equal"); -#define OID_INET_SUPEQ_OP 934 +#define OID_INET_SUPEQ_OP 934 DATA(insert OID = 3552 ( "&&" PGNSP PGUID b f f 869 869 16 3552 0 network_overlap networksel networkjoinsel )); DESCR("overlaps (is subnet or supernet)"); +#define OID_INET_OVERLAP_OP 3552 DATA(insert OID = 2634 ( "~" PGNSP PGUID l f f 0 869 869 0 0 inetnot - - )); DESCR("bitwise not"); DATA(insert OID = 2635 ( "&" PGNSP PGUID b f f 869 869 869 0 0 inetand - - )); DESCR("bitwise and"); DATA(insert OID = 2636 ( "|" PGNSP PGUID b f f 869 869 869 0 0 inetor - - )); DESCR("bitwise or"); DATA(insert OID = 2637 ( "+" PGNSP PGUID b f f 869 20 869 2638 0 inetpl - - )); DESCR("add"); DATA(insert OID = 2638 ( "+" PGNSP PGUID b f f 20 869 869 2637 0 int8pl_inet - - ));
\set count 10000 \set join_count 1000 \set null_count 1000 \set duplicate_count 1000 \set duplicate_distribution 100 \set statistics 100 --select pg_backend_pid(); --select pg_sleep(10); drop table if exists network; drop table if exists network_join; create table network as select ((random() * 255.4)::int::text || '.' || (random() * 255.4)::int::text || '.' || (random() * 255.4)::int::text || '.' || (random() * 255.4)::int::text || '/' || (random() * 32.4)::int::text)::inet as addr from generate_series(1, :count) as ctr; create table network_join as select ((random() * 255.4)::int::text || '.' || (random() * 255.4)::int::text || '.' || (random() * 255.4)::int::text || '.' || (random() * 255.4)::int::text || '/' || (random() * 32.4)::int::text)::inet as addr from generate_series(1, :join_count) as ctr; insert into network select null from generate_series(1, :null_count) as ctr; insert into network values ('::1'), ('::2/64'); -- Duplicate some values to create MCV list insert into network select nth_value(addr, (random() * :duplicate_distribution)::int + 1) over (order by random()) from network, generate_series(0, (:duplicate_count / :count)::int) as multiplier limit :duplicate_count; insert into network_join select nth_value(addr, (random() * :duplicate_distribution)::int + 1) over (order by random()) from network_join, generate_series(0, (:duplicate_count / :join_count)::int) as multiplier limit :duplicate_count; alter table network alter column addr set statistics :statistics; alter table network_join alter column addr set statistics :statistics; analyze network; analyze network_join; select * from pg_stats where tablename like 'network%' and attname = 'addr'; -- Random constant from the join table \set const `psql -XAtc 'select addr from network_join order by random() limit 1'` \echo \echo \echo === Tests with a constant === \echo explain analyze select * from network where addr && :'const'; explain analyze select * from network where addr << :'const'; explain analyze select * from network where addr <<= :'const'; explain analyze select * from network where addr >> :'const'; explain analyze select * from network where addr >>= :'const'; \echo \echo \echo === Inner join tests === \echo explain analyze select * from network join network_join on network.addr = network_join.addr; explain analyze select * from network join network_join on network.addr && network_join.addr; explain analyze select * from network join network_join on network.addr << network_join.addr; explain analyze select * from network join network_join on network.addr <<= network_join.addr; explain analyze select * from network join network_join on network.addr >> network_join.addr; explain analyze select * from network join network_join on network.addr >>= network_join.addr; \echo \echo \echo === Left join tests === \echo explain analyze select * from network left join network_join on network.addr = network_join.addr; explain analyze select * from network left join network_join on network.addr && network_join.addr; explain analyze select * from network left join network_join on network.addr << network_join.addr; explain analyze select * from network left join network_join on network.addr <<= network_join.addr; explain analyze select * from network left join network_join on network.addr >> network_join.addr; explain analyze select * from network left join network_join on network.addr >>= network_join.addr; \echo \echo \echo === Right join tests === \echo explain analyze select * from network right join network_join on network.addr = network_join.addr; explain analyze select * from network right join network_join on network.addr && network_join.addr; explain analyze select * from network right join network_join on network.addr << network_join.addr; explain analyze select * from network right join network_join on network.addr <<= network_join.addr; explain analyze select * from network right join network_join on network.addr >> network_join.addr; explain analyze select * from network right join network_join on network.addr >>= network_join.addr; \echo \echo \echo === Semi-join tests === \echo explain analyze select * from network where exists (select 1 from network_join where network.addr = network_join.addr); explain analyze select * from network where exists (select 1 from network_join where network.addr && network_join.addr); explain analyze select * from network where exists (select 1 from network_join where network.addr << network_join.addr); explain analyze select * from network where exists (select 1 from network_join where network.addr <<= network_join.addr); explain analyze select * from network where exists (select 1 from network_join where network.addr >> network_join.addr); explain analyze select * from network where exists (select 1 from network_join where network.addr >>= network_join.addr); \echo \echo \echo === Anti-join tests === \echo explain analyze select * from network where not exists (select 1 from network_join where network.addr = network_join.addr); explain analyze select * from network where not exists (select 1 from network_join where network.addr && network_join.addr); explain analyze select * from network where not exists (select 1 from network_join where network.addr << network_join.addr); explain analyze select * from network where not exists (select 1 from network_join where network.addr <<= network_join.addr); explain analyze select * from network where not exists (select 1 from network_join where network.addr >> network_join.addr); explain analyze select * from network where not exists (select 1 from network_join where network.addr >>= network_join.addr);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers