> * Isn't "X >> Y" equivalent to "network_scan_first(X) < Y AND
> network_scan_last(X) > Y"? Or at least close enough for selectivity
> estimation purposes? Pardon my ignorance - I'm not too familiar with the
> inet datatype - but how about just calling scalarineqsel for both bounds?
Actually, "X >> Y" is equivalent to
network_scan_first(X) <= network_host(Y) AND
network_scan_last(X) >= network_host(Y) AND
network_masklen(X) < network_masklen(X)
but we do not have statistics for neither network_scan_last(X)
nor network_masklen(X). I tried to find a solution based on
the implementation of the operators.
> * inet_mcv_join_selec() is O(n^2) where n is the number of entries in the
> MCV lists. With the max statistics target of 10000, a worst case query on
> my laptop took about 15 seconds to plan. Maybe that's acceptable, but you
> went through some trouble to make planning of MCV vs histogram faster, by
> the log2 method to compare only some values, so I wonder why you didn't do
> the same for the MCV vs MCV case?
It was like that in the previous versions. It was causing worse
estimation, but I was trying to reduce both sides of the lists. It
works slightly better when only the left hand side of the list is
reduced. Attached version works like that.
> * A few typos: lenght -> length.
Fixed.
Thank you for looking at it.
diff --git a/src/backend/utils/adt/network_selfuncs.c
b/src/backend/utils/adt/network_selfuncs.c
index d0d806f..a00706c 100644
--- a/src/backend/utils/adt/network_selfuncs.c
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -1,32 +1,671 @@
/*-------------------------------------------------------------------------
*
* network_selfuncs.c
* Functions for selectivity estimation of inet/cidr operators
*
- * Currently these are just stubs, but we hope to do better soon.
+ * Estimates are based on null fraction, distinct value count, most common
+ * values, and histogram of inet/cidr datatypes.
*
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/utils/adt/network_selfuncs.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "utils/lsyscache.h"
#include "utils/inet.h"
+#include "utils/selfuncs.h"
+/* Default selectivity constant for the inet overlap operator */
+#define DEFAULT_OVERLAP_SEL 0.01
+
+/* Default selectivity constant for the other operators */
+#define DEFAULT_INCLUSION_SEL 0.005
+
+/* Default selectivity for given operator */
+#define DEFAULT_SEL(operator) \
+ ((operator) == OID_INET_OVERLAP_OP ? \
+ DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
+
+static Selectivity networkjoinsel_inner(Oid operator,
+ VariableStatData *vardata1,
VariableStatData *vardata2);
+extern double eqjoinsel_semi(Oid operator, VariableStatData *vardata1,
+ VariableStatData *vardata2, RelOptInfo *inner_rel);
+extern RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
+static short int inet_opr_order(Oid operator);
+static Selectivity inet_his_inclusion_selec(Datum *values, int nvalues,
+ Datum *constvalue, short int
opr_order);
+static Selectivity inet_mcv_join_selec(Datum *values1, float4 *numbers1,
+ int nvalues1, Datum *values2, float4
*numbers2,
+ int nvalues2, int red_nvalues, Oid
operator);
+static Selectivity inet_mcv_his_selec(Datum *mcv_values, float4 *mcv_numbers,
+ int mcv_nvalues, Datum *his_values, int
his_nvalues,
+ int red_nvalues, short int opr_order,
+ Selectivity *max_selec_pointer);
+static Selectivity inet_his_inclusion_join_selec(Datum *his1_values,
+ int his1_nvalues, Datum *his2_values,
int his2_nvalues,
+ int red_nvalues,
short int opr_order);
+static short int inet_inclusion_cmp(inet *left, inet *right,
+ short int opr_order);
+static short int inet_masklen_inclusion_cmp(inet *left, inet *right,
+ short int opr_order);
+static short int inet_his_match_divider(inet *boundary, inet *query,
+ short int opr_order);
+
+/*
+ * Selectivity estimation for the subnet inclusion operators
+ */
Datum
networksel(PG_FUNCTION_ARGS)
{
- PG_RETURN_FLOAT8(0.001);
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3),
+ his_nvalues;
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Selectivity selec,
+ max_mcv_selec;
+ Datum constvalue,
+ *his_values;
+ Form_pg_statistic stats;
+ double nullfrac;
+ FmgrInfo proc;
+
+ /*
+ * If expression is not (variable op something) or (something op
+ * variable), then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata,
&other, &varonleft))
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+ }
+
+ /* All of the subnet inclusion operators are strict. */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+
+ if (!HeapTupleIsValid(vardata.statsTuple))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+ }
+
+ constvalue = ((Const *) other)->constvalue;
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ nullfrac = stats ? stats->stanullfrac : 0.0;
+
+ fmgr_info(get_opcode(operator), &proc);
+ selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
+ &max_mcv_selec);
+
+ if (get_attstatsslot(vardata.statsTuple,
+ vardata.atttype,
vardata.atttypmod,
+ STATISTIC_KIND_HISTOGRAM,
InvalidOid,
+ NULL,
+ &his_values, &his_nvalues,
+ NULL, NULL))
+ {
+ selec += (1.0 - nullfrac - max_mcv_selec) *
+ inet_his_inclusion_selec(his_values, his_nvalues,
&constvalue,
+
varonleft ? inet_opr_order(operator) :
+
inet_opr_order(operator) * -1);
+
+ free_attstatsslot(vardata.atttype, his_values, his_nvalues,
NULL, 0);
+ }
+ else if (max_mcv_selec == 0.0)
+ selec = (1.0 - nullfrac) * DEFAULT_SEL(operator);
+
+ /* Result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(selec);
}
+/*
+ * Join selectivity estimation for the subnet inclusion operators
+ *
+ * This function is the copy of eqjoinsel() on selfuncs.c except the comments
+ * and that it calls networkjoinsel_inner() instead of eqjoinsel_inner().
+ */
Datum
networkjoinsel(PG_FUNCTION_ARGS)
{
- PG_RETURN_FLOAT8(0.001);
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+#ifdef NOT_USED
+ JoinType jointype = (JoinType) PG_GETARG_INT16(3);
+#endif
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+ double selec;
+ VariableStatData vardata1;
+ VariableStatData vardata2;
+ bool join_is_reversed;
+ RelOptInfo *inner_rel;
+
+ get_join_variables(root, args, sjinfo,
+ &vardata1, &vardata2,
&join_is_reversed);
+
+ switch (sjinfo->jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_LEFT:
+ case JOIN_FULL:
+
+ /*
+ * Selectivity for left join is not exactly same as
inner join,
+ * but is neglected.
+ */
+ if (!join_is_reversed)
+ selec = networkjoinsel_inner(operator,
&vardata1, &vardata2);
+ else
+ selec =
networkjoinsel_inner(get_commutator(operator),
+
&vardata2, &vardata1);
+ break;
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+
+ /*
+ * Selectivity estimation functions of semi and anti
joins are not
+ * implemented for the subnet inclusion operators.
+ * eqjoinsel_semi() used to cover. It makes small or
big mistakes
+ * based on the join type, the operator and the ratio
between the
+ * row counts.
+ */
+ inner_rel = find_join_input_rel(root,
sjinfo->min_righthand);
+
+ if (!join_is_reversed)
+ selec = eqjoinsel_semi(operator, &vardata1,
&vardata2,
+
inner_rel);
+ else
+ selec = eqjoinsel_semi(get_commutator(operator),
+
&vardata2, &vardata1,
+
inner_rel);
+ break;
+ default:
+ /* other values not expected here */
+ elog(ERROR, "unrecognized join type: %d",
+ (int) sjinfo->jointype);
+ selec = 0; /* keep compiler quiet
*/
+ break;
+ }
+
+ ReleaseVariableStats(vardata1);
+ ReleaseVariableStats(vardata2);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * Inner join selectivity estimation for the subnet inclusion operators
+ *
+ * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
+ * selectivity for join using the subnet inclusion operators. Unlike the
+ * join selectivity function for the equality operator, eqjoinsel_inner(),
+ * one to one matching of the values is not enough. Network inclusion
+ * operators are likely to match many to many. It requires to loop the MVC
+ * and histogram lists to the end. Also, MCV vs histogram selectiviy is
+ * not neglected as in eqjoinsel_inner().
+ *
+ * To make the function faster only some of the values from the first
+ * MVC and histogram matched to the second histogram. It is calculated by
+ * log2().
+ */
+static Selectivity
+networkjoinsel_inner(Oid operator,
+ VariableStatData *vardata1,
VariableStatData *vardata2)
+{
+ Form_pg_statistic stats;
+ double nullfrac1 = 0.0,
+ nullfrac2 = 0.0;
+ Selectivity selec = 0.0,
+ mcv1_max_selec = 0.0,
+ mcv2_max_selec = 0.0;
+ bool mcv1_exists = false,
+ mcv2_exists = false,
+ his1_exists = false,
+ his2_exists = false;
+ int mcv1_nvalues,
+ mcv2_nvalues,
+ mcv1_nnumbers,
+ mcv2_nnumbers,
+ his1_nvalues,
+ his2_nvalues,
+ red1_nvalues,
+ red2_nvalues;
+ Datum *mcv1_values,
+ *mcv2_values,
+ *his1_values,
+ *his2_values;
+ float4 *mcv1_numbers,
+ *mcv2_numbers;
+
+ if (HeapTupleIsValid(vardata1->statsTuple))
+ {
+ if ((stats = (Form_pg_statistic)
GETSTRUCT(vardata1->statsTuple)))
+ nullfrac1 = stats->stanullfrac;
+
+ mcv1_exists = get_attstatsslot(vardata1->statsTuple,
+
vardata1->atttype, vardata1->atttypmod,
+
STATISTIC_KIND_MCV, InvalidOid,
+ NULL,
+
&mcv1_values, &mcv1_nvalues,
+
&mcv1_numbers, &mcv1_nnumbers);
+ his1_exists = get_attstatsslot(vardata1->statsTuple,
+
vardata1->atttype, vardata1->atttypmod,
+
STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ NULL,
+
&his1_values, &his1_nvalues,
+
NULL, NULL);
+ red1_nvalues = ((int) log2(Max(mcv1_nvalues, his1_nvalues))) +
1;
+ }
+
+ if (HeapTupleIsValid(vardata2->statsTuple))
+ {
+ if ((stats = (Form_pg_statistic)
GETSTRUCT(vardata2->statsTuple)))
+ nullfrac2 = stats->stanullfrac;
+
+ mcv2_exists = get_attstatsslot(vardata2->statsTuple,
+
vardata2->atttype, vardata2->atttypmod,
+
STATISTIC_KIND_MCV, InvalidOid,
+ NULL,
+
&mcv2_values, &mcv2_nvalues,
+
&mcv2_numbers, &mcv2_nnumbers);
+ his2_exists = get_attstatsslot(vardata2->statsTuple,
+
vardata2->atttype, vardata2->atttypmod,
+
STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ NULL,
+
&his2_values, &his2_nvalues,
+
NULL, NULL);
+ red2_nvalues = ((int) log2(Max(mcv2_nvalues, his2_nvalues))) +
1;
+ }
+
+ if (mcv1_exists && mcv2_exists)
+ selec += inet_mcv_join_selec(mcv1_values, mcv1_numbers,
mcv1_nvalues,
+
mcv2_values, mcv2_numbers, mcv2_nvalues,
+
Min(mcv1_nvalues, red1_nvalues), operator);
+ if (mcv1_exists && his2_exists)
+ selec += inet_mcv_his_selec(mcv1_values, mcv1_numbers,
mcv1_nvalues,
+
his2_values, his2_nvalues,
+
Min(mcv1_nvalues, red1_nvalues),
+
inet_opr_order(operator), &mcv1_max_selec);
+ if (mcv2_exists && his1_exists)
+ selec += inet_mcv_his_selec(mcv2_values, mcv2_numbers,
mcv2_nvalues,
+
his1_values, his1_nvalues,
+
Min(mcv2_nvalues, red2_nvalues),
+
inet_opr_order(operator), &mcv2_max_selec);
+ if (his1_exists && his2_exists)
+ selec += (1.0 - nullfrac1 - mcv1_max_selec) *
+ (1.0 - nullfrac2 - mcv2_max_selec) *
+ inet_his_inclusion_join_selec(his1_values, his1_nvalues,
+
his2_values, his2_nvalues,
+
Min(his1_nvalues, red1_nvalues),
+
inet_opr_order(operator));
+
+ if ((!mcv1_exists && !his1_exists) || (!mcv2_exists && !his2_exists))
+ selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) *
DEFAULT_SEL(operator);
+
+ if (mcv1_exists)
+ free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues,
+ mcv1_numbers, mcv1_nnumbers);
+ if (mcv2_exists)
+ free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues,
+ mcv2_numbers, mcv2_nnumbers);
+ if (his1_exists)
+ free_attstatsslot(vardata1->atttype, his1_values, his1_nvalues,
+ NULL, 0);
+ if (his2_exists)
+ free_attstatsslot(vardata2->atttype, his2_values, his2_nvalues,
+ NULL, 0);
+
+ return selec;
+}
+
+/*
+ * Practical comparable numbers for the subnet inclusion operators
+ */
+static short int
+inet_opr_order(Oid operator)
+{
+ switch (operator)
+ {
+ case OID_INET_SUP_OP:
+ return -2;
+ case OID_INET_SUPEQ_OP:
+ return -1;
+ case OID_INET_OVERLAP_OP:
+ return 0;
+ case OID_INET_SUBEQ_OP:
+ return 1;
+ case OID_INET_SUB_OP:
+ return 2;
+ default:
+ elog(ERROR, "unknown operator for inet inclusion
selectivity");
+ }
+}
+
+/*
+ * Inet histogram inclusion selectivity estimation
+ *
+ * Calculates histogram selectivity for the subnet inclusion operators of
+ * the inet type. The return value is between 0 and 1. It should be
+ * corrected with the MVC selectivity and null fraction. If the constant
+ * is less than the first element or greater than the last element of
+ * the histogram the return value will be 0.
+ *
+ * The histogram is originally for the basic comparison operators. Only
+ * the common bits of the network part and the length of the network part
+ * (masklen) are appropriate for the subnet inclusion opeators. Fortunately,
+ * basic comparison fits in this situation. Even so, the length of the
+ * network part would not really be significant in the histogram. This would
+ * lead to big mistakes for data sets with uneven masklen distribution.
+ * To avoid this problem, comparison with the left and the right side of the
+ * buckets used together.
+ *
+ * Histogram bucket matches are calculated in two forms. If the constant
+ * matches both sides the bucket is considered as fully matched. If the
+ * constant matches only the right side the bucket, it is not considered
+ * as matched unless it is the last bucket, because it will match the next
+ * bucked. If all of these buckets would be considered as matched, it would
+ * lead to unfair multiple matches for some constants.
+ *
+ * The second form is to match the bucket partially. We try to calculate
+ * dividers for both of the boundaries. If the address family of the boundary
+ * does not match the constant or comparison of the length of the network
+ * parts is not true by the operator, the divider for the boundary would not
+ * taken into account. If both of the dividers can be calculated the greater
+ * one will be used to mimimize the mistake in the buckets which have
+ * disperate masklens.
+ *
+ * The divider on the partial bucket match is imagined as the distance
+ * between the decisive bits and the common bits of the addresses. It will
+ * be* used as power of two as it is the natural scale for the IP network
+ * inclusion. The partial bucket match divider calculation is an empirical
+ * formula and subject to change with more experiment.
+ *
+ * For partial match with buckets which have different address families
+ * on the left and right sides only the boundary with the same address
+ * family is taken into consideration. This can cause more mistakes for these
+ * buckets if the masklens of their boundaries are also disparate. It can
+ * only be the case for one bucket, if there are addresses with different
+ * families on the column. It seems as a better option than not considering
+ * these buckets.
+ */
+static Selectivity
+inet_his_inclusion_selec(Datum *values, int nvalues, Datum *constvalue,
+ short int opr_order)
+{
+ inet *query,
+ *left,
+ *right;
+ float match = 0.0;
+ int i;
+ short int left_order,
+ right_order,
+ left_divider,
+ right_divider;
+
+ query = DatumGetInetP(*constvalue);
+ left = DatumGetInetP(values[0]);
+ left_order = inet_inclusion_cmp(left, query, opr_order);
+
+ for (i = 1; i < nvalues; i++)
+ {
+ right = DatumGetInetP(values[i]);
+ right_order = inet_inclusion_cmp(right, query, opr_order);
+
+ if (left_order == 0 && right_order == 0)
+ {
+ /* Full bucket match. */
+
+ match += 1.0;
+ }
+ else if ((left_order <= 0 && right_order > 0) ||
+ (left_order >= 0 && right_order < 0) ||
+ (right_order == 0 && i == nvalues - 1))
+ {
+ /* Partial bucket match. */
+
+ left_divider = inet_his_match_divider(left, query,
opr_order);
+ right_divider = inet_his_match_divider(right, query,
opr_order);
+
+ if (left_divider >= 0 || right_divider >= 0)
+ match += 1.0 / pow(2, Max(left_divider,
right_divider));
+ }
+
+ /* Shift the variables. */
+ left = right;
+ left_order = right_order;
+ }
+
+ /* There are nvalues - 1 buckets. */
+ return match / (nvalues - 1);
+}
+
+/*
+ * Inet MCV join selectivity estimation
+ *
+ * The original function of the operator used in this function, like the
+ * mcv_selectivity() on selfuncs.c. Actually, this function has nothing
+ * to do with the network data types except its name and location.
+ */
+static Selectivity
+inet_mcv_join_selec(Datum *values1, float4 *numbers1, int nvalues1,
+ Datum *values2, float4 *numbers2, int
nvalues2,
+ int red_nvalues, Oid operator)
+{
+ Selectivity selec = 0.0,
+ red_selec = 0.0,
+ max_selec = 0.0;
+ FmgrInfo proc;
+ int i,
+ j;
+
+ fmgr_info(get_opcode(operator), &proc);
+
+ for (i = 0; i < nvalues1; i++)
+ {
+ if (i < red_nvalues)
+ {
+ for (j = 0; j < nvalues2; j++)
+ if (DatumGetBool(FunctionCall2Coll(&proc,
DEFAULT_COLLATION_OID,
+
values1[i], values2[j])))
+ selec += numbers1[i] * numbers2[j];
+
+ red_selec += numbers1[i];
+ }
+
+ max_selec += numbers1[i];
+ }
+
+ return selec * max_selec / red_selec;
+}
+
+/*
+ * Inet MCV vs histogram inclusion join selectivity estimation
+ *
+ * The function result is the selectivity, and the fraction of the total
+ * population of the MCV is returned into *max_selec_pointer.
+ */
+static Selectivity
+inet_mcv_his_selec(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
+ Datum *his_values, int his_nvalues, int
red_nvalues,
+ short int opr_order, Selectivity
*max_selec_pointer)
+{
+ Selectivity selec = 0.0,
+ red_selec = 0.0,
+ max_selec = 0.0;
+ int i;
+
+ for (i = 0; i < mcv_nvalues; i++)
+ {
+ if (i < red_nvalues)
+ {
+ selec += mcv_numbers[i] *
+ inet_his_inclusion_selec(his_values,
his_nvalues,
+
&mcv_values[i], opr_order);
+
+ red_selec += mcv_numbers[i];
+ }
+
+ max_selec += mcv_numbers[i];
+ }
+
+ *max_selec_pointer = max_selec;
+ return selec * max_selec / red_selec;
+}
+
+/*
+ * Inet histogram inclusion join selectivity estimation
+ *
+ * Selected values from the first histogram will be matched with the second.
+ * red_nvalues of the values will by discarding same amount of values from
+ * the begging and the end of the list, on the grounds that they are outliers
+ * and hence not very representative.
+ */
+static Selectivity
+inet_his_inclusion_join_selec(Datum *his1_values, int his1_nvalues,
+ Datum *his2_values,
int his2_nvalues,
+ int red_nvalues,
short int opr_order)
+{
+ float match = 0.0;
+ int nskip = (his1_nvalues - red_nvalues) / 2,
+ i;
+
+ for (i = nskip; i < his1_nvalues - nskip; i++)
+ match += inet_his_inclusion_selec(his2_values, his2_nvalues,
+
&his1_values[i], opr_order);
+
+ return match / (his1_nvalues - 2 * nskip);
+}
+
+/*
+ * Comparison function for the subnet inclusion operators
+ *
+ * Comparison is compatible with the basic comparison function for the inet
+ * type. See network_cmp_internal on network.c for the original. Basic
+ * comparison operators are implemented with the network_cmp_internal
+ * function. It is possible to implement the subnet inclusion operators with
+ * this function.
+ *
+ * Comparison is first on the common bits of the network part, then on
+ * the length of the network part (masklen) as the network_cmp_internal
+ * function. Only the first part is on this function. The second part is
+ * seperated to another function for reusability. The difference between
+ * the second part and the original network_cmp_internal is that the operator
+ * is used while comparing the lengths of the network parts. See the second
+ * part on the inet_masklen_inclusion_cmp function below.
+ */
+static short int
+inet_inclusion_cmp(inet *left, inet *right, short int opr_order)
+{
+ if (ip_family(left) == ip_family(right))
+ {
+ short int order;
+
+ order = bitncmp(ip_addr(left), ip_addr(right),
+ Min(ip_bits(left),
ip_bits(right)));
+
+ if (order != 0)
+ return order;
+
+ return inet_masklen_inclusion_cmp(left, right, opr_order);
+ }
+
+ return ip_family(left) - ip_family(right);
+}
+
+/*
+ * Masklen comparison function for the subnet inclusion operators
+ *
+ * Compares the lengths of network parts of the inputs using the operator.
+ * If the comparision is okay for the operator the return value will be 0.
+ * Otherwise the return value will be less than or greater than 0 with
+ * respect to the operator.
+ */
+static short int
+inet_masklen_inclusion_cmp(inet *left, inet *right, short int opr_order)
+{
+ if (ip_family(left) == ip_family(right))
+ {
+ short int order;
+
+ order = ip_bits(left) - ip_bits(right);
+
+ if ((order > 0 && opr_order >= 0) ||
+ (order == 0 && opr_order >= -1 && opr_order <= 1) ||
+ (order < 0 && opr_order <= 0))
+ return 0;
+
+ return opr_order;
+ }
+
+ return ip_family(left) - ip_family(right);
+}
+
+/*
+ * Inet histogram partial match divider calculation
+ *
+ * First the families and the lengths of the network parts are compared
+ * using the subnet inclusion operator. The divider will be calculated
+ * using the masklens and the common bits of the addresses. -1 will be
+ * returned if it cannot be calculated.
+ */
+static short int
+inet_his_match_divider(inet *boundary, inet *query, short int opr_order)
+{
+ if (inet_masklen_inclusion_cmp(boundary, query, opr_order) == 0)
+ {
+ short int min_bits,
+ decisive_bits;
+
+ min_bits = Min(ip_bits(boundary), ip_bits(query));
+
+ /*
+ * Set the decisive bits from the one which should contain the
other
+ * according to the operator.
+ */
+ if (opr_order < 0)
+ decisive_bits = ip_bits(boundary);
+ else if (opr_order > 0)
+ decisive_bits = ip_bits(query);
+ else
+ decisive_bits = min_bits;
+
+ if (min_bits > 0)
+ return decisive_bits - bitncommon(ip_addr(boundary),
ip_addr(query),
+
min_bits);
+ return decisive_bits;
+ }
+
+ return -1;
}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index e932ccf..8dcda93 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -149,21 +149,21 @@ static double var_eq_const(VariableStatData *vardata, Oid
operator,
bool varonleft);
static double var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other,
bool varonleft);
static double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt,
Datum constval, Oid
consttype);
static double eqjoinsel_inner(Oid operator,
VariableStatData *vardata1, VariableStatData
*vardata2);
-static double eqjoinsel_semi(Oid operator,
+double eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData
*vardata2,
RelOptInfo *inner_rel);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
static double convert_numeric_to_scalar(Datum value, Oid typid);
static void convert_string_to_scalar(char *value,
double *scaledvalue,
char *lobound,
double *scaledlobound,
@@ -182,21 +182,21 @@ static double convert_one_bytea_to_scalar(unsigned char
*value, int valuelen,
static char *convert_string_datum(Datum value, Oid typid);
static double convert_timevalue_to_scalar(Datum value, Oid typid);
static void examine_simple_variable(PlannerInfo *root, Var *var,
VariableStatData *vardata);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop, Datum *min, Datum *max);
static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
Oid sortop,
Datum *min, Datum *max);
-static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
+RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon);
static Selectivity like_selectivity(const char *patt, int pattlen,
bool case_insensitive);
static Selectivity regex_selectivity(const char *patt, int pattlen,
bool case_insensitive,
int fixed_prefix_len);
static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
@@ -2418,21 +2418,21 @@ eqjoinsel_inner(Oid operator,
return selec;
}
/*
* eqjoinsel_semi --- eqjoinsel for semi join
*
* (Also used for anti join, which we are supposed to estimate the same way.)
* Caller has ensured that vardata1 is the LHS variable.
*/
-static double
+double
eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData
*vardata2,
RelOptInfo *inner_rel)
{
double selec;
double nd1;
double nd2;
bool isdefault1;
bool isdefault2;
Form_pg_statistic stats1 = NULL;
@@ -5094,21 +5094,21 @@ get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
return have_data;
}
/*
* find_join_input_rel
* Look up the input relation for a join.
*
* We assume that the input relation's RelOptInfo must have been constructed
* already.
*/
-static RelOptInfo *
+RelOptInfo *
find_join_input_rel(PlannerInfo *root, Relids relids)
{
RelOptInfo *rel = NULL;
switch (bms_membership(relids))
{
case BMS_EMPTY_SET:
/* should not happen */
break;
case BMS_SINGLETON:
diff --git a/src/include/catalog/pg_operator.h
b/src/include/catalog/pg_operator.h
index d7dcd1c..3b827fc 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -1135,32 +1135,33 @@ DESCR("not equal");
DATA(insert OID = 1203 ( "<" PGNSP PGUID b f f 869 869 16 1205 1206
network_lt scalarltsel scalarltjoinsel ));
DESCR("less than");
DATA(insert OID = 1204 ( "<=" PGNSP PGUID b f f 869 869 16 1206 1205
network_le scalarltsel scalarltjoinsel ));
DESCR("less than or equal");
DATA(insert OID = 1205 ( ">" PGNSP PGUID b f f 869 869 16 1203 1204
network_gt scalargtsel scalargtjoinsel ));
DESCR("greater than");
DATA(insert OID = 1206 ( ">=" PGNSP PGUID b f f 869 869 16 1204 1203
network_ge scalargtsel scalargtjoinsel ));
DESCR("greater than or equal");
DATA(insert OID = 931 ( "<<" PGNSP PGUID b f f 869 869 16 933
0 network_sub networksel networkjoinsel ));
DESCR("is subnet");
-#define OID_INET_SUB_OP 931
+#define OID_INET_SUB_OP 931
DATA(insert OID = 932 ( "<<=" PGNSP PGUID b f f 869 869 16 934
0 network_subeq networksel networkjoinsel ));
DESCR("is subnet or equal");
-#define OID_INET_SUBEQ_OP 932
+#define OID_INET_SUBEQ_OP 932
DATA(insert OID = 933 ( ">>" PGNSP PGUID b f f 869 869 16 931
0 network_sup networksel networkjoinsel ));
DESCR("is supernet");
-#define OID_INET_SUP_OP 933
+#define OID_INET_SUP_OP 933
DATA(insert OID = 934 ( ">>=" PGNSP PGUID b f f 869 869 16 932
0 network_supeq networksel networkjoinsel ));
DESCR("is supernet or equal");
-#define OID_INET_SUPEQ_OP 934
+#define OID_INET_SUPEQ_OP 934
DATA(insert OID = 3552 ( "&&" PGNSP PGUID b f f 869 869 16 3552
0 network_overlap networksel networkjoinsel ));
DESCR("overlaps (is subnet or supernet)");
+#define OID_INET_OVERLAP_OP 3552
DATA(insert OID = 2634 ( "~" PGNSP PGUID l f f 0 869 869 0 0 inetnot
- - ));
DESCR("bitwise not");
DATA(insert OID = 2635 ( "&" PGNSP PGUID b f f 869 869 869 0 0 inetand
- - ));
DESCR("bitwise and");
DATA(insert OID = 2636 ( "|" PGNSP PGUID b f f 869 869 869 0 0 inetor
- - ));
DESCR("bitwise or");
DATA(insert OID = 2637 ( "+" PGNSP PGUID b f f 869 20 869 2638 0
inetpl - - ));
DESCR("add");
DATA(insert OID = 2638 ( "+" PGNSP PGUID b f f 20 869 869 2637 0
int8pl_inet - - ));
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers