Re: [HACKERS] SQL/JSON in PostgreSQL

2017-11-03 Thread Nikita Glukhov

On 03.11.2017 00:32, Piotr Stefaniak wrote:


On 2017-02-28 20:08, Oleg Bartunov wrote:

The standard describes SQL/JSON path language, which used by SQL/JSON query
operators to query JSON. It defines path language as string literal. We
implemented the path language as  JSONPATH data type, since other
approaches are not friendly to planner and executor.

I was a bit sad to discover that I can't
PREPARE jsq AS SELECT JSON_QUERY('{}', $1);
I assume because of this part of the updated grammar:
json_path_specification:
 Sconst { $$ = $1; }
;

Would it make sense, fundamentally, to allow variables there? After
Andrew Gierth's analysis of this grammar problem, I understand that it's
not reasonable to expect JSON_TABLE() to support variable jsonpaths, but
maybe it would be feasible for everything else? From Andrew's changes to
the new grammar (see attached) it seems to me that at least that part is
possible. Or should I forget about trying to implement the other part?

By standard only string literals can be used in JSON path specifications.
But of course it is possible to allow to use variable jsonpath 
expressions in

SQL/JSON functions.

Attached patch implements this feature for JSON query functions, 
JSON_TABLE is

not supported now because it needs some refactoring.

I have pushed this commit to the separate branch because it is not 
finished yet:

https://github.com/postgrespro/sqljson/tree/sqljson_variable_json_path

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
>From cba58ec1410eb99fc974d8a46013ce7331c93377 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Fri, 3 Nov 2017 14:15:51 +0300
Subject: [PATCH] Allow variable jsonpath specifications

---
 src/backend/executor/execExpr.c   |  7 +++
 src/backend/executor/execExprInterp.c |  5 ++---
 src/backend/nodes/copyfuncs.c |  2 +-
 src/backend/parser/gram.y | 11 ++-
 src/backend/parser/parse_clause.c | 35 ++-
 src/backend/parser/parse_expr.c   | 19 +--
 src/backend/utils/adt/ruleutils.c | 11 +--
 src/include/executor/execExpr.h   |  3 ++-
 src/include/nodes/parsenodes.h|  2 +-
 src/include/nodes/primnodes.h |  2 +-
 10 files changed, 72 insertions(+), 25 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 3cc8e12..86030b9 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -2042,6 +2042,13 @@ ExecInitExprRec(Expr *node, PlanState *parent, ExprState *state,
 _expr->value,
 _expr->isnull);
 
+scratch.d.jsonexpr.pathspec =
+	palloc(sizeof(*scratch.d.jsonexpr.pathspec));
+
+ExecInitExprRec((Expr *) jexpr->path_spec, parent, state,
+>value,
+>isnull);
+
 scratch.d.jsonexpr.formatted_expr =
 		ExecInitExpr((Expr *) jexpr->formatted_expr, parent);
 
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index b9d719d..36ef0fd 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3897,7 +3897,7 @@ ExecEvalJson(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 	*op->resnull = true;		/* until we get a result */
 	*op->resvalue = (Datum) 0;
 
-	if (op->d.jsonexpr.raw_expr->isnull)
+	if (op->d.jsonexpr.raw_expr->isnull || op->d.jsonexpr.pathspec->isnull)
 	{
 		/* execute domain checks for NULLs */
 		(void) ExecEvalJsonExprCoercion(op, econtext, res, op->resnull, isjsonb);
@@ -3905,8 +3905,7 @@ ExecEvalJson(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 	}
 
 	item = op->d.jsonexpr.raw_expr->value;
-
-	path = DatumGetJsonPath(jexpr->path_spec->constvalue);
+	path = DatumGetJsonPath(op->d.jsonexpr.pathspec->value);
 
 	/* reset JSON path variable contexts */
 	foreach(lc, op->d.jsonexpr.args)
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 80c0e48..34fed1d 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2332,7 +2332,7 @@ _copyJsonCommon(const JsonCommon *from)
 	JsonCommon	   *newnode = makeNode(JsonCommon);
 
 	COPY_NODE_FIELD(expr);
-	COPY_STRING_FIELD(pathspec);
+	COPY_NODE_FIELD(pathspec);
 	COPY_STRING_FIELD(pathname);
 	COPY_NODE_FIELD(passing);
 	COPY_LOCATION_FIELD(location);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index adfe9b1..71a59d1 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -624,6 +624,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	json_table_plan_cross
 	json_table_plan_primary
 	json_table_default_plan
+	json_path_specification
 
 %type 		json_arguments
 	json_passing_clause_opt
@@ -635,8 +636,7 @@ static Node *makeRecursiveViewSelect(char 

Re: [HACKERS] SQL/JSON in PostgreSQL

2017-10-30 Thread Nikita Glukhov

Hi, hackers!

I have a question about transformation of JSON constructors into executor nodes.

In first letter in this thread we wrote:
   JSON_OBJECT(), JSON_ARRAY() constructors and IS JSON predicate are
   transformed into raw function calls.

Here is an example explaining what it means:

=# CREATE VIEW json_object_view AS
SELECT JSON_OBJECT('foo': 1, 'bar': '[1,2]' FORMAT JSON RETURNING text);
CREATE VIEW
=# \sv json_object_view
CREATE OR REPLACE VIEW public.json_object_view AS
 SELECT json_build_object_ext(false, false, 'foo', 1, 'bar', 
'[1,2]'::text::json)::text

As you can see JSON_OBJECT() was transformed into a call on new function
json_build_object_ext(), which shares a code with existing json_build_object()
but differs from it only by two additional boolean parameters for
representation of  {WITH|WITHOUT} UNIQUE [KEYS] and {NULL|ABSENT} ON NULL
clauses.  Information about FORMAT, RETURNING clauses was lost, since they
were transformed into casts.

Other constructors are transformed similary:
JSON_ARRAY() => json[b]_build_array_ext(boolean, VARIADIC any)
JSON_OBJECTAGG() => json[b]_objectagg(any, any, boolean, boolean)
JSON_ARRAYAGG()  => json[b]_agg[_strict](any)

Also there is a variant of JSON_ARRAY() with subquery which transformed into a
subselect with json[b]_agg():
=# CREATE VIEW json_array_view AS SELECT JSON_ARRAY(SELECT 
generate_series(1,3));
CREATE VIEW
=# \sv json_array_view
CREATE OR REPLACE VIEW public.json_array_view AS
 SELECT ( SELECT json_agg_strict(q.a)
   FROM ( SELECT generate_series(1, 3) AS generate_series) q(a))



And here is my question: is it acceptable to do such transformations?
And if is not acceptable (it seemed unacceptable to us from the beginning,
but we did not have time for correct implementation), how should JSON
constructor nodes look like?


The simplest solution that I can propose is to save both transformed
expressions in existing JsonObjectCtor/JsonArrayCtor nodes which exist
now only in untransformed trees.  Whole untransformed JsonXxxCtor node
will be used for displaying, transformed expression -- for execution only.

But it will not work for aggregates, because they are transformed into a
Aggref/WindowFunc node.  Information needed for correct displaying should be
saved somewhere in these standard nodes.

And for subquery variant of JSON_ARRAY I can only offer to leave transformation
into a subselect with JSON_ARRAYAGG():
JSON_ARRAY(query) => (SELECT JSON_ARRAYAGG(bar) FROM (query) foo(bar))

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] SQL/JSON in PostgreSQL

2017-09-29 Thread Nikita Glukhov

On 29.09.2017 12:59, Pavel Stehule wrote:


Hi

2017-09-16 1:31 GMT+02:00 Nikita Glukhov <n.glu...@postgrespro.ru 
<mailto:n.glu...@postgrespro.ru>>:


On 15.09.2017 22:36, Oleg Bartunov wrote:

On Fri, Sep 15, 2017 at 7:31 PM, Robert Haas
<robertmh...@gmail.com <mailto:robertmh...@gmail.com>> wrote:

On Fri, Sep 15, 2017 at 10:10 AM, Daniel Gustafsson
<dan...@yesql.se <mailto:dan...@yesql.se>> wrote:

Can we expect a rebased version of this patch for this
commitfest?  Since it’s
a rather large feature it would be good to get it in
as early as we can in the
process.

Again, given that this needs a "major" rebase and hasn't
been updated
in a month, and given that the CF is already half over,
this should
just be bumped to the next CF.  We're supposed to be
trying to review
things that were ready to go by the start of the CF, not
the end.

We are supporting v10 branch in our github repository
https://github.com/postgrespro/sqljson/tree/sqljson_v10
<https://github.com/postgrespro/sqljson/tree/sqljson_v10>

Since the first post we made a lot of changes, mostly because of
better understanding the standard and availability of
technical report

(http://standards.iso.org/ittf/PubliclyAvailableStandards/c067367_ISO_IEC_TR_19075-6_2017.zip

<http://standards.iso.org/ittf/PubliclyAvailableStandards/c067367_ISO_IEC_TR_19075-6_2017.zip>).
Most important are:

1.We abandoned FORMAT support, which could confuse our users,
since we
have data types json[b].

2. We use XMLTABLE infrastructure, extended for JSON_TABLE
support.

3. Reorganize commits, so we could split one big patch by several
smaller patches, which could be reviewed independently.

4. The biggest problem is documentation, we are working on it.

Nikita will submit patches soon.


Attached archive with 9 patches rebased onto latest master.

0001-jsonpath-v02.patch:
 - jsonpath type
 - jsonpath execution on jsonb type
 - jsonpath operators for jsonb type
 - GIN support for jsonpath operators

0002-jsonpath-json-v02.patch:
 - jsonb-like iterators for json type
 - jsonpath execution on json type
 - jsonpath operators for json type

0003-jsonpath-extensions-v02.patch:
0004-jsonpath-extensions-tests-for-json-v02.patch:
 - some useful standard extensions with tests
 0005-sqljson-v02.patch:
 - SQL/JSON constructors (JSON_OBJECT[AGG], JSON_ARRAY[AGG])
 - SQL/JSON query functions (JSON_VALUE, JSON_QUERY, JSON_EXISTS)
 - IS JSON predicate

0006-sqljson-json-v02.patch:
 - SQL/JSON support for json type and tests

0007-json_table-v02.patch:
 - JSON_TABLE using XMLTABLE infrastructure

0008-json_table-json-v02.patch:
 - JSON_TABLE support for json type

0009-wip-extensions-v02.patch:
 - FORMAT JSONB
 - jsonb to/from bytea casts
 - jsonpath operators
 - some unfinished jsonpath extensions


Originally, JSON path was implemented only for jsonb type, and I
decided to
add jsonb-like iterators for json type for json support
implementation with
minimal changes in JSON path code.  This solution (see
jsonpath_json.c from
patch 0002) looks a little dubious to me, so I separated json
support into
independent patches.

The last WIP patch 0009 is unfinished and contains a lot of
FIXMEs.  But
the ability to use arbitrary Postgres operators in JSON path with
explicitly
specified  types is rather interesting, and I think it should be
shown now
to get a some kind of pre-review.

We are supporting v11 and v10 branches in our github repository:

https://github.com/postgrespro/sqljson/tree/sqljson
<https://github.com/postgrespro/sqljson/tree/sqljson>
https://github.com/postgrespro/sqljson/tree/sqljson_wip
<https://github.com/postgrespro/sqljson/tree/sqljson_wip>
https://github.com/postgrespro/sqljson/tree/sqljson_v10
<https://github.com/postgrespro/sqljson/tree/sqljson_v10>
https://github.com/postgrespro/sqljson/tree/sqljson_v10_wip
<https://github.com/postgrespro/sqljson/tree/sqljson_v10_wip>

Attached patches can be produced simply by combining groups of
consecutive
commits from these branches.


I have some free time now. Is it last version?

Regards

Pavel

Yes, this is still the latest version. Now I am working only on 
unfinished WIP

patch no. 9, but I think it should be reviewed the last.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company


Re: [HACKERS] compress method for spgist - 2

2017-09-21 Thread Nikita Glukhov

On 21.09.2017 02:27, Alexander Korotkov wrote:

On Thu, Sep 21, 2017 at 2:06 AM, Darafei "Komяpa" Praliaskouski 
<m...@komzpa.net <mailto:m...@komzpa.net>> wrote:


It is possible for bbox->low.x to be NaN when circle->center.x
is and
circle->radius are both +Infinity.


What is rationale behind this circle?


I would prefer to rather forbid any geometries with infs and nans.  
However, then upgrade process will suffer.  User with such geometries 
would get errors during dump/restore, pg_upgraded instances would 
still contain invalid values...


It seems to me that any circle with radius of any Infinity should
become a [-Infinity .. Infinity, -Infinity .. Infinity] box.Then
you won't have NaNs, and index structure shouldn't be broken.


We probably should produce [-Infinity .. Infinity, -Infinity .. 
Infinity] box for any geometry containing inf or nan.  That MBR would 
be founded for any query, saying: "index can't help you for this kind 
value, only recheck can deal with that".  Therefore, we would at least 
guarantee that results of sequential scan and index scan are the same.




I have looked at the GiST KNN code and found the same errors for NaNs,
infinities and NULLs as in my SP-GiST KNN patch.

Attached two patches:

1. Fix NaN-inconsistencies in circle's bounding boxes computed in GiST 
compress

method leading to the failed Assert(box->low.x <= box->high.x) in
computeDistance() from src/backend/access/gist/gistproc.c by 
transforming NaNs

into infinities (corresponding test case provided in the second patch).

2. Fix ordering of NULL, NaN and infinite distances by GiST.  This distance
values could be mixed because NULL distances were transformed into 
infinities,
and there was no special processing for NaNs in KNN queue's comparison 
function.

At first I tried just to set recheck flag for NULL distances, but it did not
work for index-only scans because they do not support rechecking. So I had
to add a special flag for NULL distances.


Should I start a separate thread for this issue and add patches to 
commitfest?


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
>From 52ab493cfe1ec6260578054b71f2c48e77d4850a Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Fri, 22 Sep 2017 02:06:48 +0300
Subject: [PATCH 1/2] Fix circle bounding box inconsistency in GiST compress
 method

---
 src/backend/access/gist/gistproc.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c
index 08990f5..2699304 100644
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@@ -1149,6 +1149,16 @@ gist_circle_compress(PG_FUNCTION_ARGS)
 		r->high.y = in->center.y + in->radius;
 		r->low.y = in->center.y - in->radius;
 
+		/* avoid box inconsistency by transforming NaNs into infinities */
+		if (isnan(r->high.x))
+			r->high.x = get_float8_infinity();
+		if (isnan(r->high.y))
+			r->high.y = get_float8_infinity();
+		if (isnan(r->low.x))
+			r->low.x = -get_float8_infinity();
+		if (isnan(r->low.y))
+			r->low.y = -get_float8_infinity();
+
 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
 		gistentryinit(*retval, PointerGetDatum(r),
 	  entry->rel, entry->page,
-- 
2.7.4

>From 066ad9104ec0e967e20e820977286f001e4055a4 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Thu, 21 Sep 2017 19:09:02 +0300
Subject: [PATCH 2/2] Fix GiST ordering by distance for NULLs and NaNs

---
 src/backend/access/gist/gistget.c  | 29 ++---
 src/backend/access/gist/gistscan.c | 36 +++--
 src/include/access/gist_private.h  | 13 ++--
 src/test/regress/expected/gist.out | 64 ++
 src/test/regress/sql/gist.sql  | 60 +++
 5 files changed, 184 insertions(+), 18 deletions(-)

diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index 760ea0c..7fed700 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -132,7 +132,7 @@ gistindex_keytest(IndexScanDesc scan,
 	GISTSTATE  *giststate = so->giststate;
 	ScanKey		key = scan->keyData;
 	int			keySize = scan->numberOfKeys;
-	double	   *distance_p;
+	GISTDistance *distance_p;
 	Relation	r = scan->indexRelation;
 
 	*recheck_p = false;
@@ -150,7 +150,10 @@ gistindex_keytest(IndexScanDesc scan,
 		if (GistPageIsLeaf(page))	/* shouldn't happen */
 			elog(ERROR, "invalid GiST tuple found on leaf page");
 		for (i = 0; i < scan->numberOfOrderBys; i++)
-			so->distances[i] = -get_float8_infinity();
+		{
+			so->distances[i].value = -get_float8_infinity();
+			so->distances[i].isnull = false;
+		}
 		return true;
 	}
 
@@ -248,7 +251,8

Re: [HACKERS] compress method for spgist - 2

2017-09-20 Thread Nikita Glukhov

On 20.09.2017 23:19, Alexander Korotkov wrote:

On Wed, Sep 20, 2017 at 11:07 PM, Tom Lane <t...@sss.pgh.pa.us 
<mailto:t...@sss.pgh.pa.us>> wrote:


Darafei Praliaskouski <m...@komzpa.net <mailto:m...@komzpa.net>> writes:
> I have some questions about the circles example though.

>  * What is the reason for isnan check and swap of box ordinates
for circle? It wasn't in the code previously.

I hadn't paid any attention to this patch previously, but this comment
excited my curiosity, so I went and looked:

+       bbox->high.x = circle->center.x + circle->radius;
+       bbox->low.x = circle->center.x - circle->radius;
+       bbox->high.y = circle->center.y + circle->radius;
+       bbox->low.y = circle->center.y - circle->radius;
+
+       if (isnan(bbox->low.x))
+       {
+               double tmp = bbox->low.x;
+               bbox->low.x = bbox->high.x;
+               bbox->high.x = tmp;
+       }

Maybe I'm missing something, but it appears to me that it's
impossible for
bbox->low.x to be NaN unless circle->center.x and/or
circle->radius is a
NaN, in which case bbox->high.x would also have been computed as a
NaN,
making the swap entirely useless.  Likewise for the Y case.  There
may be
something useful to do about NaNs here, but this doesn't seem like it.

Yeah, +1.



It is possible for bbox->low.x to be NaN when circle->center.x is and
circle->radius are both +Infinity.  Without this float-order-preserving 
swapping
one regression test for KNN with ORDER BY index will be totally broken 
(you can
try it: https://github.com/glukhovn/postgres/tree/knn). Unfortunately, I 
do not
remember exactly why, but most likely because of the incorrect index 
structure.


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company


Re: [HACKERS] SQL/JSON in PostgreSQL

2017-09-18 Thread Nikita Glukhov

On 18.09.2017 00:38, Alvaro Herrera wrote:


Nikita Glukhov wrote:


0007-json_table-v02.patch:
  - JSON_TABLE using XMLTABLE infrastructure

0008-json_table-json-v02.patch:
  - JSON_TABLE support for json type

I'm confused ... why are these two patches and not a single one?


As I sad before, json support in jsonpath looks a bit dubious to me.  So if
patch no. 2 will not be accepted, then patches no. 4, 6, 8 should also be
simply skipped.  But, of course, patches 1 and 2, 3 and 4, 5 and 6, 7 and 8
can be combined.

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PATCH: recursive json_populate_record()

2017-05-30 Thread Nikita Glukhov

On 30.05.2017 02:31, Tom Lane wrote:


Nikita Glukhov <n.glu...@postgrespro.ru> writes:

2. Also I've found a some kind of thinko in JsGetObjectSize() macro, but it
seems that this obvious mistake can not lead to incorrect behavior.

Hm, I think it actually was wrong for the case of jsonb_cont == NULL,
wasn't it?


Yes, JsObjectIsEmpty() returned false negative answer for jsonb_cont == NULL,
but this could only lead to suboptimal behavior of populate_record() when the
default record value was given.


But maybe that case didn't arise for the sole call site.

It also seems to me that populate_record() can't be called now with
jsonb_cont == NULL from the existing call sites.

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PATCH: recursive json_populate_record()

2017-05-22 Thread Nikita Glukhov

Attached two small fixes for the previous committed patch:

1. I've noticed a difference in behavior between json_populate_record()
and  jsonb_populate_record() if we are trying to populate record from a
non-JSON-object: json function throws an error but jsonb function returns
a record with NULL fields. So I think it would be better to throw an error
in jsonb case too, but I'm not sure.

2. Also I've found a some kind of thinko in JsGetObjectSize() macro, but it
seems that this obvious mistake can not lead to incorrect behavior.

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index ab9a745..a98742f 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -308,12 +308,11 @@ typedef struct JsObject
 	((jsv)->is_json ? (jsv)->val.json.type == JSON_TOKEN_STRING \
 		: ((jsv)->val.jsonb && (jsv)->val.jsonb->type == jbvString))
 
-#define JsObjectSize(jso) \
+#define JsObjectIsEmpty(jso) \
 	((jso)->is_json \
-		? hash_get_num_entries((jso)->val.json_hash) \
-		: !(jso)->val.jsonb_cont || JsonContainerSize((jso)->val.jsonb_cont))
-
-#define JsObjectIsEmpty(jso) (JsObjectSize(jso) == 0)
+		? hash_get_num_entries((jso)->val.json_hash) == 0 \
+		: (!(jso)->val.jsonb_cont || \
+		   JsonContainerSize((jso)->val.jsonb_cont) == 0))
 
 #define JsObjectFree(jso) do { \
 		if ((jso)->is_json) \
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index a98742f..083982c 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -2683,7 +2683,20 @@ JsValueToJsObject(JsValue *jsv, JsObject *jso)
 			JsonContainerIsObject(jbv->val.binary.data))
 			jso->val.jsonb_cont = jbv->val.binary.data;
 		else
+		{
+			bool		is_scalar = IsAJsonbScalar(jbv) ||
+(jbv->type == jbvBinary &&
+ JsonContainerIsScalar(jbv->val.binary.data));
+
+			ereport(ERROR,
+	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+	 errmsg(is_scalar
+			? "cannot call %s on a scalar"
+			: "cannot call %s on an array",
+			"populate_composite")));
+
 			jso->val.jsonb_cont = NULL;
+		}
 	}
 }
 
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index f199eb4..7954703 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -2276,17 +2276,9 @@ SELECT jsa FROM jsonb_populate_record(NULL::jsbrec, '{"jsa": ["aaa", null, [1, 2
 (1 row)
 
 SELECT rec FROM jsonb_populate_record(NULL::jsbrec, '{"rec": 123}') q;
- rec  
---
- (,,)
-(1 row)
-
+ERROR:  cannot call populate_composite on a scalar
 SELECT rec FROM jsonb_populate_record(NULL::jsbrec, '{"rec": [1, 2]}') q;
- rec  
---
- (,,)
-(1 row)
-
+ERROR:  cannot call populate_composite on an array
 SELECT rec FROM jsonb_populate_record(NULL::jsbrec, '{"rec": {"a": "abc", "c": "01.02.2003", "x": 43.2}}') q;
 rec
 ---
@@ -2303,11 +2295,7 @@ SELECT reca FROM jsonb_populate_record(NULL::jsbrec, '{"reca": 123}') q;
 ERROR:  expected json array
 HINT:  see the value of key "reca"
 SELECT reca FROM jsonb_populate_record(NULL::jsbrec, '{"reca": [1, 2]}') q;
-  reca   
--
- {"(,,)","(,,)"}
-(1 row)
-
+ERROR:  cannot call populate_composite on a scalar
 SELECT reca FROM jsonb_populate_record(NULL::jsbrec, '{"reca": [{"a": "abc", "b": 456}, null, {"c": "01.02.2003", "x": 43.2}]}') q;
   reca  
 

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix freeing of dangling IndexScanDesc.xs_hitup in GiST

2017-05-04 Thread Nikita Glukhov

On 04.05.2017 22:16, Tom Lane wrote:


Nikita Glukhov <n.glu...@postgrespro.ru> writes:

In gistrescan() IndexScanDesc.xs_hitup is not reset after MemoryContextReset() 
of
so->queueCxt in which xs_hitup was allocated, then getNextNearest() tries to 
pfree()
dangling xs_hitup, which results in the reuse of this pointer and the 
subsequent crash.

Right.  I already did something about this, about an hour ago --- a
bit differently from your patch, but same idea.

regards, tom lane


Sorry that I'm not monitoring pgsql-bugs.

It might be interesting that I found this bug back in July 2016 when I
was experimenting with my KNN-btree implementation, but I failed to report
it because I could reproduce it only manually by a calling in a loop
gistrescan() and gistgettuple().

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] Fix freeing of dangling IndexScanDesc.xs_hitup in GiST

2017-05-04 Thread Nikita Glukhov

Hello, hackers!

The last query in the following script crashes Postgres:

create table t (id serial, amount int);
insert into t (amount) select random() * 1000 from generate_series(1, 100);
create extension btree_gist;
create index t_gist_idx on t using gist(id, amount);

select p.id, p.amount, s.nearest
from t as p left join lateral
(
  select p.id, array_agg(l.id) as nearest from (
select id from t order by amount <-> p.amount limit 10
  ) l
) s using(id);

In gistrescan() IndexScanDesc.xs_hitup is not reset after MemoryContextReset() 
of
so->queueCxt in which xs_hitup was allocated, then getNextNearest() tries to 
pfree()
dangling xs_hitup, which results in the reuse of this pointer and the 
subsequent crash.

Attached patches fix this bug introduced in commit
d04c8ed9044eccebce043143a930617e3998c005 "Add support for index-only scans in 
GiST".
The bug is present in v9.5, v9.6, v10.0.

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index 2526a39..580b6ac 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -148,6 +148,11 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys,
 		/* third or later time through */
 		MemoryContextReset(so->queueCxt);
 		first_time = false;
+		/*
+		 * scan->xs_itup is allocated in so->queueCxt and now it is invalid,
+		 * so we need to reset it to prevent it from freeing in getNextNearest().
+		 */
+		scan->xs_itup = NULL;
 	}
 
 	/*
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index 81ff8fc..5e10ada 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -148,6 +148,11 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys,
 		/* third or later time through */
 		MemoryContextReset(so->queueCxt);
 		first_time = false;
+		/*
+		 * scan->xs_hitup is allocated in so->queueCxt and now it is invalid,
+		 * so we need to reset it to prevent it from freeing in getNextNearest().
+		 */
+		scan->xs_hitup = NULL;
 	}
 
 	/*

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PATCH: recursive json_populate_record()

2017-03-22 Thread Nikita Glukhov

On 22.03.2017 00:26, David Steele wrote:


On 3/21/17 2:31 PM, Andrew Dunstan wrote:

On 03/21/2017 01:37 PM, David Steele wrote:

>>

This thread has been idle for months since Tom's review.

The submission has been marked "Returned with Feedback". Please feel
free to resubmit to a future commitfest.


Please revive. I am planning to look at this later this week.


Revived in "Waiting on Author" state.



Here is updated v05 version of this patch:
  * rebased to the latest master
  * one patch is completely removed because it is unnecessary now
  * added some macros for JsValue fields access
  * added new JsObject structure for passing json/jsonb objects
  * refactoring patch is not yet simplified (not broken into a 
step-by-step sequence)


Also I must notice that json branch of this code is not as optimal as it 
might be:

there could be repetitive parsing passes for nested json objects/arrays
instead of a single parsing pass.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index bf2c91f..da29dab 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -109,6 +109,7 @@ typedef struct JhashState
 	HTAB	   *hash;
 	char	   *saved_scalar;
 	char	   *save_json_start;
+	JsonTokenType saved_token_type;
 } JHashState;
 
 /* hashtable element */
@@ -116,26 +117,49 @@ typedef struct JsonHashEntry
 {
 	char		fname[NAMEDATALEN];		/* hash key (MUST BE FIRST) */
 	char	   *val;
-	char	   *json;
-	bool		isnull;
+	JsonTokenType type;
 } JsonHashEntry;
 
-/* these two are stolen from hstore / record_out, used in populate_record* */
-typedef struct ColumnIOData
+/* structure to cache type I/O metadata needed for populate_scalar() */
+typedef struct ScalarIOData
 {
-	Oid			column_type;
-	Oid			typiofunc;
 	Oid			typioparam;
-	FmgrInfo	proc;
-} ColumnIOData;
+	FmgrInfo	typiofunc;
+} ScalarIOData;
+
+typedef struct ColumnIOData ColumnIOData;
+typedef struct RecordIOData RecordIOData;
 
-typedef struct RecordIOData
+/* structure to cache metadata needed for populate_composite() */
+typedef struct CompositeIOData
+{
+	/*
+	 * We use pointer to a RecordIOData here because variable-length
+	 * struct RecordIOData can't be used directly in ColumnIOData.io union
+	 */
+	RecordIOData   *record_io;	/* metadata cache for populate_record() */
+	TupleDesc		tupdesc;	/* cached tuple descriptor */
+} CompositeIOData;
+
+/* these two are stolen from hstore / record_out, used in populate_record* */
+
+/* structure to cache record metadata needed for populate_record_field() */
+struct ColumnIOData
+{
+	Oid			typid;		/* column type id */
+	int32		typmod;		/* column type modifier */
+	ScalarIOData scalar_io;	/* metadata cache for directi conversion
+			 * through input function */
+};
+
+/* structure to cache record metadata needed for populate_record() */
+struct RecordIOData
 {
 	Oid			record_type;
 	int32		record_typmod;
 	int			ncolumns;
 	ColumnIOData columns[FLEXIBLE_ARRAY_MEMBER];
-} RecordIOData;
+};
 
 /* state for populate_recordset */
 typedef struct PopulateRecordsetState
@@ -145,10 +169,11 @@ typedef struct PopulateRecordsetState
 	HTAB	   *json_hash;
 	char	   *saved_scalar;
 	char	   *save_json_start;
+	JsonTokenType saved_token_type;
 	Tuplestorestate *tuple_store;
 	TupleDesc	ret_tdesc;
 	HeapTupleHeader rec;
-	RecordIOData *my_extra;
+	RecordIOData **my_extra;
 	MemoryContext fn_mcxt;		/* used to stash IO funcs */
 } PopulateRecordsetState;
 
@@ -160,6 +185,55 @@ typedef struct StripnullState
 	bool		skip_next_null;
 } StripnullState;
 
+/* structure for generalized json/jsonb value passing */
+typedef struct JsValue
+{
+	bool is_json;/* json/jsonb */
+	union
+	{
+		struct
+		{
+			char   *str;		/* json string */
+			int		len;		/* json string length or -1 if null-terminated */
+			JsonTokenType type;	/* json type */
+		} json;	/* json value */
+
+		JsonbValue *jsonb;		/* jsonb value */
+	} val;
+} JsValue;
+
+typedef struct JsObject
+{
+	bool		is_json;		/* json/jsonb */
+	union
+	{
+		HTAB		   *json_hash;
+		JsonbContainer *jsonb_cont;
+	} val;
+} JsObject;
+
+#define JsValueIsNull(jsv) \
+	((jsv)->is_json ?  \
+		(!(jsv)->val.json.str || (jsv)->val.json.type == JSON_TOKEN_NULL) : \
+		(!(jsv)->val.jsonb || (jsv)->val.jsonb->type == jbvNull))
+
+#define JsValueIsString(jsv) \
+	((jsv)->is_json ? (jsv)->val.json.type == JSON_TOKEN_STRING \
+		: ((jsv)->val.jsonb && (jsv)->val.jsonb->type == jbvString))
+
+#define JsObjectSize(jso) \
+	((jso)->is_json \
+		? hash_get_num_entries((jso)->val.json_hash) \
+		: !(jso)->val.jsonb_cont || JsonContainerSize((jso)->val.jsonb_cont))
+
+#define JsObjectIsEmpty(jso) (JsObjectSize(jso) == 0)
+
+#define JsObjectFree(jso) do { \
+		if ((jso)->is_json) \
+			hash_destroy((jso)->val.json_hash); \
+	} while (0)
+
+
 /* semanti

Re: [HACKERS] [PATCH]: fix bug in SP-GiST box_ops

2017-03-10 Thread Nikita Glukhov

On 10.03.2017 02:13, Tels wrote:


I can't comment on the code, but the grammar on the comments caught my eye:

+/* Can any range from range_box does not extend higher than this argument? */
+static bool
+overLower2D(RangeBox *range_box, Range *query)
+{
+   return FPle(range_box->left.low, query->high) &&
+   FPle(range_box->right.low, query->high);
+}

The sentence sounds quite garbled in English. I'm not entirely sure what
it should be, but given the comment below "/* Can any range from range_box
to be higher than this argument? */" maybe something like:

/* Does any range from range_box extend to the right side of the query? */

If used, an analog wording should be used for overHigher2D's comment like:

/* Does any range from range_box extend to the left side of the query? */


I've changed comments as you proposed, but I've added missing "not" and left 
"Can":

/* Can any range from range_box not extend to the right side of the query? */
/* Can any range from range_box not extend to the left side of the query? */

See also comments on overhigher and overlower operators from documentation:

&<Does not extend to the right of?
&>Does not extend to the left of?


Another question: Does it make sense to add the "minimal bad example for
the '&<' case" as test case, too? After all, it should pass the test after
the patch.


Yes, it will make sense, but the Kyotaro's test case doesn't work for me and
I still don't know how to force SP-GiST to create inner leaves without
inserting hundreds of rows.

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c
index aacb340..c95ec1c 100644
--- a/src/backend/utils/adt/geo_spgist.c
+++ b/src/backend/utils/adt/geo_spgist.c
@@ -286,6 +286,14 @@ lower2D(RangeBox *range_box, Range *query)
 		FPlt(range_box->right.low, query->low);
 }
 
+/* Can any range from range_box not extend to the right side of the query? */
+static bool
+overLower2D(RangeBox *range_box, Range *query)
+{
+	return FPle(range_box->left.low, query->high) &&
+		FPle(range_box->right.low, query->high);
+}
+
 /* Can any range from range_box to be higher than this argument? */
 static bool
 higher2D(RangeBox *range_box, Range *query)
@@ -294,6 +302,14 @@ higher2D(RangeBox *range_box, Range *query)
 		FPgt(range_box->right.high, query->high);
 }
 
+/* Can any range from range_box not extend to the left side of the query? */
+static bool
+overHigher2D(RangeBox *range_box, Range *query)
+{
+	return FPge(range_box->left.high, query->low) &&
+		FPge(range_box->right.high, query->low);
+}
+
 /* Can any rectangle from rect_box be left of this argument? */
 static bool
 left4D(RectBox *rect_box, RangeBox *query)
@@ -305,7 +321,7 @@ left4D(RectBox *rect_box, RangeBox *query)
 static bool
 overLeft4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_x, >right);
+	return overLower2D(_box->range_box_x, >left);
 }
 
 /* Can any rectangle from rect_box be right of this argument? */
@@ -319,7 +335,7 @@ right4D(RectBox *rect_box, RangeBox *query)
 static bool
 overRight4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_x, >right);
+	return overHigher2D(_box->range_box_x, >left);
 }
 
 /* Can any rectangle from rect_box be below of this argument? */
@@ -333,7 +349,7 @@ below4D(RectBox *rect_box, RangeBox *query)
 static bool
 overBelow4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_y, >left);
+	return overLower2D(_box->range_box_y, >right);
 }
 
 /* Can any rectangle from rect_box be above of this argument? */
@@ -347,7 +363,7 @@ above4D(RectBox *rect_box, RangeBox *query)
 static bool
 overAbove4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_y, >right);
+	return overHigher2D(_box->range_box_y, >right);
 }
 
 /*
diff --git a/src/test/regress/expected/box.out b/src/test/regress/expected/box.out
index 5f8b945..251df93 100644
--- a/src/test/regress/expected/box.out
+++ b/src/test/regress/expected/box.out
@@ -455,3 +455,108 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
 
 RESET enable_seqscan;
 DROP INDEX box_spgist;
+--
+-- Test the SP-GiST index on the larger volume of data
+--
+CREATE TEMPORARY TABLE quad_box_tbl (b box);
+INSERT INTO quad_box_tbl
+	SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
+	FROM generate_series(1, 100) x,
+		 generate_series(1, 100) y;
+-- insert repeating data to test allTheSame
+INSERT INTO quad_box_tbl
+	SELECT '((200, 300),(210, 310))'
+	FROM generate_series(1, 1000);
+INSERT INTO quad_box_tbl
+	VALUES
+		(NULL),
+		(NULL),
+		('((-infinity,-infinity),(infinity,infinity))'),
+		('((-infinity,100),(-infinit

Re: [HACKERS] [PATCH] kNN for SP-GiST

2017-02-06 Thread Nikita Glukhov

Attached v02 version (rebased to HEAD).

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index f92baed..1dfaba2 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -23,6 +23,7 @@
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/syscache.h"
+#include "utils/lsyscache.h"
 
 
 /*
@@ -851,12 +852,6 @@ gistproperty(Oid index_oid, int attno,
 			 IndexAMProperty prop, const char *propname,
 			 bool *res, bool *isnull)
 {
-	HeapTuple	tuple;
-	Form_pg_index rd_index PG_USED_FOR_ASSERTS_ONLY;
-	Form_pg_opclass rd_opclass;
-	Datum		datum;
-	bool		disnull;
-	oidvector  *indclass;
 	Oid			opclass,
 opfamily,
 opcintype;
@@ -890,49 +885,28 @@ gistproperty(Oid index_oid, int attno,
 	}
 
 	/* First we need to know the column's opclass. */
-
-	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
-	if (!HeapTupleIsValid(tuple))
+	opclass = get_index_column_opclass(index_oid, attno);
+	if (!OidIsValid(opclass))
 	{
 		*isnull = true;
 		return true;
 	}
-	rd_index = (Form_pg_index) GETSTRUCT(tuple);
-
-	/* caller is supposed to guarantee this */
-	Assert(attno > 0 && attno <= rd_index->indnatts);
-
-	datum = SysCacheGetAttr(INDEXRELID, tuple,
-			Anum_pg_index_indclass, );
-	Assert(!disnull);
-
-	indclass = ((oidvector *) DatumGetPointer(datum));
-	opclass = indclass->values[attno - 1];
-
-	ReleaseSysCache(tuple);
 
 	/* Now look up the opclass family and input datatype. */
-
-	tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
-	if (!HeapTupleIsValid(tuple))
+	if (!get_opclass_opfamily_and_input_type(opclass, , ))
 	{
 		*isnull = true;
 		return true;
 	}
-	rd_opclass = (Form_pg_opclass) GETSTRUCT(tuple);
-
-	opfamily = rd_opclass->opcfamily;
-	opcintype = rd_opclass->opcintype;
-
-	ReleaseSysCache(tuple);
 
 	/* And now we can check whether the function is provided. */
-
 	*res = SearchSysCacheExists4(AMPROCNUM,
  ObjectIdGetDatum(opfamily),
  ObjectIdGetDatum(opcintype),
  ObjectIdGetDatum(opcintype),
  Int16GetDatum(procno));
+	*isnull = false;
+
 	return true;
 }
 
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 1b04c09..1d479fe 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -1050,6 +1050,32 @@ get_opclass_input_type(Oid opclass)
 	return result;
 }
 
+/*
+ * get_opclass_family_and_input_type
+ *
+ *		Returns the OID of the operator family the opclass belongs to,
+ *the OID of the datatype the opclass indexes
+ */
+bool
+get_opclass_opfamily_and_input_type(Oid opclass, Oid *opfamily, Oid *opcintype)
+{
+	HeapTuple	tp;
+	Form_pg_opclass cla_tup;
+
+	tp = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
+	if (!HeapTupleIsValid(tp))
+		return false;
+
+	cla_tup = (Form_pg_opclass) GETSTRUCT(tp);
+
+	*opfamily = cla_tup->opcfamily;
+	*opcintype = cla_tup->opcintype;
+
+	ReleaseSysCache(tp);
+
+	return true;
+}
+
 /*-- OPERATOR CACHE --	 */
 
 /*
@@ -3061,3 +3087,45 @@ get_range_subtype(Oid rangeOid)
 	else
 		return InvalidOid;
 }
+
+/*-- PG_INDEX CACHE -- */
+
+/*
+ * get_index_column_opclass
+ *
+ *		Given the index OID and column number,
+ *		return opclass of the index column
+ *			or InvalidOid if the index was not found.
+ */
+Oid
+get_index_column_opclass(Oid index_oid, int attno)
+{
+	HeapTuple	tuple;
+	Form_pg_index rd_index PG_USED_FOR_ASSERTS_ONLY;
+	Datum		datum;
+	bool		isnull;
+	oidvector  *indclass;
+	Oid			opclass;
+
+	/* First we need to know the column's opclass. */
+
+	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
+	if (!HeapTupleIsValid(tuple))
+		return InvalidOid;
+
+	rd_index = (Form_pg_index) GETSTRUCT(tuple);
+
+	/* caller is supposed to guarantee this */
+	Assert(attno > 0 && attno <= rd_index->indnatts);
+
+	datum = SysCacheGetAttr(INDEXRELID, tuple,
+			Anum_pg_index_indclass, );
+	Assert(!isnull);
+
+	indclass = ((oidvector *) DatumGetPointer(datum));
+	opclass = indclass->values[attno - 1];
+
+	ReleaseSysCache(tuple);
+
+	return opclass;
+}
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index b6d1fca..618c4e8 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -73,6 +73,8 @@ extern char *get_constraint_name(Oid conoid);
 extern char *get_language_name(Oid langoid, bool missing_ok);
 extern Oid	get_opclass_family(Oid opclass);
 extern Oid	get_opclass_input_type(Oid opclass);
+extern bool get_opclass_opfamily_and_input_type(Oid opclass,
+	Oid *opfamily, Oid *opcintype);
 extern RegProcedure get_opcode(Oid opno);
 extern char *get_opname(Oid opno);
 extern Oid	get_op_rettype(Oid opno);
@@ -159,6 +161,7 @@ ext

Re: [HACKERS] Cast jsonb to numeric, int, float, bool

2017-02-01 Thread Nikita Glukhov

On 02.02.2017 01:07, Jim Nasby wrote:

On 2/1/17 8:26 AM, Nikita Glukhov wrote:

Some comments about the code: I think it would be better to
 * add function for extraction of scalars from pseudo-arrays
 * iterate until WJB_DONE to pfree iterator


I'm not sure what your intent here is, but if the idea is that a json array
would magically cast to a bool or a number data type I think that's a bad idea.


My idea, of course, is not about casting any json array to a scalar.  It is only
about helper subroutine for extraction of top-level jsonb scalars that are 
always
stored as one-element pseudo-arrays with special flag F_SCALAR in the array 
header.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Cast jsonb to numeric, int, float, bool

2017-02-01 Thread Nikita Glukhov

On 01.02.2017 14:21,Anastasia Lubennikova wrote:

Now the simplest way to extract booleans and numbers from json/jsonb is
to cast it to text and then cast to the appropriate type: ...
This patch implements direct casts from jsonb numeric (jbvNumeric) to
numeric, int4 and float8, and from jsonb bool (jbvBool) to bool.


Thank you for this patch. I always wanted to add such casts by myself.



If you find it useful, I can also add support of json and other types,
such as smallint and bigint.


Yes, I'd like to have support for other types and maybe for json.


Some comments about the code: I think it would be better to
 * add function for extraction of scalars from pseudo-arrays
 * iterate until WJB_DONE to pfree iterator

Example:

static bool
JsonbGetScalar(Jsonb *jb, JsonbValue *v)
{
JsonbIterator *it;
JsonbIteratorToken tok;
JsonbValue jbv;

if (!JB_ROOT_IS_SCALAR(jb))
return false;

/*
 * A root scalar is stored as an array of one element, so we get the
 * array and then its first (and only) member.
 */
it = JsonbIteratorInit(>root);

tok = JsonbIteratorNext(, , true);
Assert(tok == WJB_BEGIN_ARRAY);

tok = JsonbIteratorNext(, v, true);
Assert(tok == WJB_ELEM);

tok = JsonbIteratorNext(, , true);
Assert(tok == WJB_END_ARRAY);

tok = JsonbIteratorNext(, , true);
Assert(tok == WJB_DONE);

return true;
}

Datum
jsonb_int4(PG_FUNCTION_ARGS)
{
Jsonb  *in = PG_GETARG_JSONB(0);
JsonbValue  v;

if (!JsonbGetScalar(in, ) || v.type != jbvNumeric)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 errmsg("key value must be json numeric")));

PG_RETURN_INT32(DatumGetInt32(DirectFunctionCall1(numeric_int4,
  
NumericGetDatum(v.val.numeric;
}

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH]: fix bug in SP-GiST box_ops

2017-01-31 Thread Nikita Glukhov

On 31.01.2017 13:04, Kyotaro HORIGUCHI wrote:

The following comment,


/* Can any range from range_box to be overlower than this argument? */


This might be better to be using the same wording to its users,
for example the comment for overLeft4D is the following.

| /* Can any rectangle from rect_box does not extend the right of this 
argument? */

And the documentation uses the following sentence,

https://www.postgresql.org/docs/current/static/functions-geometry.html
| Does not extend to the right of?

So I think "Can any range from range_box does not extend the
upper of this argument?" is better than the overlower. What do
you think?


I think "does not extend the upper" is better than unclear "overlower" too.
So I've corrected this comment in the attached v03 patch.



I confirmed other functions in geo_spgist but found no bugs like this.


I found no bugs in other functions too.



The minimal bad example for the '&<' case is the following.

=# create table t (b box);
=# create index on t using spgist(b);
=# insert into t values ('((215, 305),(210,300))'::box);
=# select * from t where b &< '((100,200),(300,400))'::box;
  b
-
 (215,305),(210,300)
(1 row)

=# set enable_seqscan to false;
=# select * from t where b &< '((100,200),(300,400))'::box;
 b
---
(0 rows)


I don't know how you were able to reproduce this bug in 
spg_box_quad_inner_consistent()
using only one box because index tree must have at least one inner node (or 157 
boxes)
in order to spg_box_quad_inner_consistent() began to be called.  That's why 
existing
tests for box_ops didn't detect this bug: box_temp table has only 56 rows.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c
index aacb340..c95ec1c 100644
--- a/src/backend/utils/adt/geo_spgist.c
+++ b/src/backend/utils/adt/geo_spgist.c
@@ -286,6 +286,14 @@ lower2D(RangeBox *range_box, Range *query)
 		FPlt(range_box->right.low, query->low);
 }
 
+/* Can any range from range_box does not extend higher than this argument? */
+static bool
+overLower2D(RangeBox *range_box, Range *query)
+{
+	return FPle(range_box->left.low, query->high) &&
+		FPle(range_box->right.low, query->high);
+}
+
 /* Can any range from range_box to be higher than this argument? */
 static bool
 higher2D(RangeBox *range_box, Range *query)
@@ -294,6 +302,14 @@ higher2D(RangeBox *range_box, Range *query)
 		FPgt(range_box->right.high, query->high);
 }
 
+/* Can any range from range_box does not extend lower than this argument? */
+static bool
+overHigher2D(RangeBox *range_box, Range *query)
+{
+	return FPge(range_box->left.high, query->low) &&
+		FPge(range_box->right.high, query->low);
+}
+
 /* Can any rectangle from rect_box be left of this argument? */
 static bool
 left4D(RectBox *rect_box, RangeBox *query)
@@ -305,7 +321,7 @@ left4D(RectBox *rect_box, RangeBox *query)
 static bool
 overLeft4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_x, >right);
+	return overLower2D(_box->range_box_x, >left);
 }
 
 /* Can any rectangle from rect_box be right of this argument? */
@@ -319,7 +335,7 @@ right4D(RectBox *rect_box, RangeBox *query)
 static bool
 overRight4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_x, >right);
+	return overHigher2D(_box->range_box_x, >left);
 }
 
 /* Can any rectangle from rect_box be below of this argument? */
@@ -333,7 +349,7 @@ below4D(RectBox *rect_box, RangeBox *query)
 static bool
 overBelow4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_y, >left);
+	return overLower2D(_box->range_box_y, >right);
 }
 
 /* Can any rectangle from rect_box be above of this argument? */
@@ -347,7 +363,7 @@ above4D(RectBox *rect_box, RangeBox *query)
 static bool
 overAbove4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_y, >right);
+	return overHigher2D(_box->range_box_y, >right);
 }
 
 /*
diff --git a/src/test/regress/expected/box.out b/src/test/regress/expected/box.out
index 5f8b945..251df93 100644
--- a/src/test/regress/expected/box.out
+++ b/src/test/regress/expected/box.out
@@ -455,3 +455,108 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
 
 RESET enable_seqscan;
 DROP INDEX box_spgist;
+--
+-- Test the SP-GiST index on the larger volume of data
+--
+CREATE TEMPORARY TABLE quad_box_tbl (b box);
+INSERT INTO quad_box_tbl
+	SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
+	FROM generate_series(1, 100) x,
+		 generate_series(1, 100) y;
+-- insert repeating data to test allTheSame
+INSERT INTO quad_box_tbl
+	SELECT '((200, 300),(210, 310))'
+	FROM generate_series(1, 1000);
+INSERT INTO quad_box_tbl
+	VALUES
+		(NULL),
+		(NULL),
+		

[HACKERS] [PATCH] kNN for SP-GiST

2017-01-30 Thread Nikita Glukhov

Hello hackers,

I'd like to present a series of patches which is a continuation of
Vlad Sterzhanov's work on kNN for SP-GiST that was done for GSOC'14.

Original thread: "KNN searches support for SP-GiST [GSOC'14]"
https://www.postgresql.org/message-id/cak2ajc0-jhrb3ujozl+arbchotvwbejvprn-jofenn0va6+...@mail.gmail.com


I've done the following:
  * rebased onto HEAD
  * fixed several bugs
  * fixed indentation and unnecessary whitespace changes
  * implemented logic for order-by in spgvalidate() and spgproperty()
  * used pairing_heap instead of RBTree for the priority queue
(as it was done earlier in GiST)
  * used existing traversalValue* fields instead of the newly added suppValue* 
fields
  * added caching of support functions infos
  * added recheck support for distances
  * refactored code
 - simplified some places
 - some functions were moved from spgproc.c to spgscan.c
   (now spgproc.c contains only one public function spg_point_distance()
that can be moved somewhere and this file can be removed then)
 - extracted functions spgTestLeafTuple(), spgMakeInnerItem()
  * added new opclasses for circles and polygons with order-by-distance support
   (it was originally intended for kNN recheck testing)
  * improved tests for point_ops


Below is a very brief description of the patches:
1. Extracted two subroutines from GiST code (patch is the same as in "kNN for 
btree").
2. Exported two box functions that are used in patch 5.
3. Extracted subroutine from GiST code that is used in patch 4.
4. Main patch: added kNN support to SP-GiST.
5. Added ordering operators to kd_point_ops and quad_point_ops.
6. Added new SP-GiST support function spg_compress (gist_compress analogue)
for compressed (lossy) representation of types in SP-GiST, used in patch 7.
7. Added new SP-GiST quad-tree opclasses for circles and polygons
(reused existing quad-tree box_ops).

If you want to see the step-by-step sequence of changes applied to the original 
patch,
you can see it on my github: 
https://github.com/glukhovn/postgres/commits/knn_spgist

Please note that the tests for circle_ops and poly_ops will not work until
the а bug found in SP-GiST box_ops will not be fixed
(see 
https://www.postgresql.org/message-id/9ea5b157-478c-8874-bc9b-050076b7d042%40postgrespro.ru).

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company


diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index f92baed..1f6b671 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -23,6 +23,7 @@
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/syscache.h"
+#include "utils/lsyscache.h"
 
 
 /*
@@ -851,12 +852,6 @@ gistproperty(Oid index_oid, int attno,
 			 IndexAMProperty prop, const char *propname,
 			 bool *res, bool *isnull)
 {
-	HeapTuple	tuple;
-	Form_pg_index rd_index PG_USED_FOR_ASSERTS_ONLY;
-	Form_pg_opclass rd_opclass;
-	Datum		datum;
-	bool		disnull;
-	oidvector  *indclass;
 	Oid			opclass,
 opfamily,
 opcintype;
@@ -890,49 +885,28 @@ gistproperty(Oid index_oid, int attno,
 	}
 
 	/* First we need to know the column's opclass. */
-
-	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
-	if (!HeapTupleIsValid(tuple))
+	opclass = get_index_column_opclass(index_oid, attno);
+	if (!OidIsValid(opclass))
 	{
 		*isnull = true;
 		return true;
 	}
-	rd_index = (Form_pg_index) GETSTRUCT(tuple);
-
-	/* caller is supposed to guarantee this */
-	Assert(attno > 0 && attno <= rd_index->indnatts);
-
-	datum = SysCacheGetAttr(INDEXRELID, tuple,
-			Anum_pg_index_indclass, );
-	Assert(!disnull);
-
-	indclass = ((oidvector *) DatumGetPointer(datum));
-	opclass = indclass->values[attno - 1];
-
-	ReleaseSysCache(tuple);
 
 	/* Now look up the opclass family and input datatype. */
-
-	tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
-	if (!HeapTupleIsValid(tuple))
+	if (!get_opclass_opfamily_and_input_type(opclass, , ))
 	{
 		*isnull = true;
 		return true;
 	}
-	rd_opclass = (Form_pg_opclass) GETSTRUCT(tuple);
-
-	opfamily = rd_opclass->opcfamily;
-	opcintype = rd_opclass->opcintype;
-
-	ReleaseSysCache(tuple);
 
 	/* And now we can check whether the function is provided. */
-
 	*res = SearchSysCacheExists4(AMPROCNUM,
  ObjectIdGetDatum(opfamily),
  ObjectIdGetDatum(opcintype),
  ObjectIdGetDatum(opcintype),
  Int16GetDatum(procno));
+	isnull = false;
+
 	return true;
 }
 
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index aff88a5..26c81c9 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -1050,6 +1050,32 @@ get_opclass_input_type(Oid opclass)
 	return result;
 }
 
+/*
+ * get_opclass_family_and_input_type
+ *
+ *		Returns the OID of the operator family

Re: [HACKERS] [PATCH]: fix bug in SP-GiST box_ops

2017-01-30 Thread Nikita Glukhov

On 30.01.2017 12:04, Kyotaro HORIGUCHI wrote:

Hello,

At Mon, 30 Jan 2017 07:12:03 +0300, Nikita Glukhov <n.glu...@postgrespro.ru> 
wrote
in <9ea5b157-478c-8874-bc9b-050076b7d...@postgrespro.ru>:

Working on the tests for new SP-GiST opclasses for polygons and
circles, I've found a bug in the SP-GiST box_ops (added in 9.6): some operators
(&<, &>, $<|, |&>) have wrong tests in spg_box_quad_inner_consistent().
This obviously leads to incorrect results of a SP-GiST index scan (see tests
in the attached patch, their results were taken from a sequential scan).


Your problem is not necessarily evident for others.  Perhaps you
have to provide an explanation and/or a test case that describes
what is wrong for you so that others can get a clue for this
problem. Simpler test is better.


The problem is that there are mixed low/high values for x/y axes.  For example,
overLeft4D() compares x-RangeBox rect_box->range_box_x with y-Range 
>right,
and also lower2D() here must use query->high instead of query->low.

The corresponding test is very simple: insert 1 nearly arbitrary boxes and
see the difference between results of sequential scan and results of index scan.

regression=# CREATE TEMPORARY TABLE quad_box_tbl (b box);

regression=# INSERT INTO quad_box_tbl
SELECT box(point(x * 10, y * 10), point(x * 10 + 5, y * 10 + 5))
FROM generate_series(1, 100) x,
 generate_series(1, 100) y;

regression=# CREATE INDEX quad_box_tbl_idx ON quad_box_tbl USING spgist(b);

regression=# SET enable_seqscan = ON;
regression=# SET enable_indexscan = OFF;
regression=# SET enable_bitmapscan = OFF;

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<  box 
'((100,200),(300,500))';
 count
---
  2900
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &>  box 
'((100,200),(300,500))';
 count
---
  9100
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<| box 
'((100,200),(300,500))';
 count
---
  4900
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b |&> box 
'((100,200),(300,500))';
 count
---
  8100
(1 row)

regression=# SET enable_seqscan = OFF;
regression=# SET enable_indexscan = ON;
regression=# SET enable_bitmapscan = ON;

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<  box 
'((100,200),(300,500))';
 count
---
  2430
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &>  box 
'((100,200),(300,500))';
 count
---
  6208
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b &<| box 
'((100,200),(300,500))';
 count
---
  1048
(1 row)

regression=# SELECT count(*) FROM quad_box_tbl WHERE b |&> box 
'((100,200),(300,500))';
 count
---
  5084
(1 row)



The test,

| +INSERT INTO quad_box_tbl
| + SELECT i, box(point(x, y), point(x + w, y + h))
| + FROM (SELECT i,
| + random() * 1000 as x, random() * 1000 as y,
| + random() * 20 as w, random() * 20 as h

is inserting quad_boxes generated using random numbers then,

| +SELECT count(*) FROM quad_box_tbl WHERE b <<  box '((100,200),(300,500))';
| + count
| +---
| +   891

counting them in this way is unstable. Even though this were
stable due to a fixed initial, this would be unacceptable, I
think. This kind of test should use nonrandom numbers.


I have replaced random data in this test with stable uniformly distributed data.
I would also like to use existing box data from rect.data that is loaded to
slow_emp4000 table in copy.sql test, but box.sql test is executed before 
copy.sql.



Even though I don't understand this in depth, the following
change seems somewhat wrong in direction. Changing the second
argument type seems breaking the basis of the design.

| -lower2D(RangeBox *range_box, Range *query)
| +lower2D(RangeBox *range_box, double query)


Maybe.  I also thought that these changes are quite doubtful, so I decided to
replace lowerEqual2D()/higherEqual2D() with overlower2D()/overhigher2D() and
not to touch lower2D()/higher2D().

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c
index aacb340..2dc5496 100644
--- a/src/backend/utils/adt/geo_spgist.c
+++ b/src/backend/utils/adt/geo_spgist.c
@@ -286,6 +286,14 @@ lower2D(RangeBox *range_box, Range *query)
 		FPlt(range_box->right.low, query->low);
 }
 
+/* Can any range from range_box to be overlower than this argument? */
+static bool
+overlower2D(RangeBox *range_box, Range *query)
+{
+	return FPle(range_box->left.low, query->high) &&
+		FPle(range_box->right.low, query->high);
+}
+
 /* Can any range from range_box to be higher than this argument? */
 static bool
 higher2D(RangeBox *range_box, Range *query)
@@ -294,6 +302,14 @@ higher2D(RangeBox *range_b

[HACKERS] [PATCH]: fix bug in SP-GiST box_ops

2017-01-29 Thread Nikita Glukhov

Working on the tests for new SP-GiST opclasses for polygons and circles, I've
found a bug in the SP-GiST box_ops (added in 9.6): some operators
(&<, &>, $<|, |&>) have wrong tests in spg_box_quad_inner_consistent().
This obviously leads to incorrect results of a SP-GiST index scan (see tests
in the attached patch, their results were taken from a sequential scan).

--
Nikita Glukhov
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c
index aacb340..446aa38 100644
--- a/src/backend/utils/adt/geo_spgist.c
+++ b/src/backend/utils/adt/geo_spgist.c
@@ -280,74 +280,90 @@ contained4D(RectBox *rect_box, RangeBox *query)
 
 /* Can any range from range_box to be lower than this argument? */
 static bool
-lower2D(RangeBox *range_box, Range *query)
+lower2D(RangeBox *range_box, double query)
 {
-	return FPlt(range_box->left.low, query->low) &&
-		FPlt(range_box->right.low, query->low);
+	return FPlt(range_box->left.low, query) &&
+		FPlt(range_box->right.low, query);
+}
+
+/* Can any range from range_box to be lower or equal to this argument? */
+static bool
+lowerEqual2D(RangeBox *range_box, double query)
+{
+	return FPle(range_box->left.low, query) &&
+		FPle(range_box->right.low, query);
 }
 
 /* Can any range from range_box to be higher than this argument? */
 static bool
-higher2D(RangeBox *range_box, Range *query)
+higher2D(RangeBox *range_box, double query)
+{
+	return FPgt(range_box->left.high, query) &&
+		FPgt(range_box->right.high, query);
+}
+
+/* Can any range from range_box to be higher or equal to this argument? */
+static bool
+higherEqual2D(RangeBox *range_box, double query)
 {
-	return FPgt(range_box->left.high, query->high) &&
-		FPgt(range_box->right.high, query->high);
+	return FPge(range_box->left.high, query) &&
+		FPge(range_box->right.high, query);
 }
 
 /* Can any rectangle from rect_box be left of this argument? */
 static bool
 left4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_x, >left);
+	return lower2D(_box->range_box_x, query->left.low);
 }
 
 /* Can any rectangle from rect_box does not extend the right of this argument? */
 static bool
 overLeft4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_x, >right);
+	return lowerEqual2D(_box->range_box_x, query->left.high);
 }
 
 /* Can any rectangle from rect_box be right of this argument? */
 static bool
 right4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_x, >left);
+	return higher2D(_box->range_box_x, query->left.high);
 }
 
 /* Can any rectangle from rect_box does not extend the left of this argument? */
 static bool
 overRight4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_x, >right);
+	return higherEqual2D(_box->range_box_x, query->left.low);
 }
 
 /* Can any rectangle from rect_box be below of this argument? */
 static bool
 below4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_y, >right);
+	return lower2D(_box->range_box_y, query->right.low);
 }
 
 /* Can any rectangle from rect_box does not extend above this argument? */
 static bool
 overBelow4D(RectBox *rect_box, RangeBox *query)
 {
-	return lower2D(_box->range_box_y, >left);
+	return lowerEqual2D(_box->range_box_y, query->right.high);
 }
 
 /* Can any rectangle from rect_box be above of this argument? */
 static bool
 above4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_y, >right);
+	return higher2D(_box->range_box_y, query->right.high);
 }
 
 /* Can any rectangle from rect_box does not extend below of this argument? */
 static bool
 overAbove4D(RectBox *rect_box, RangeBox *query)
 {
-	return higher2D(_box->range_box_y, >right);
+	return higherEqual2D(_box->range_box_y, query->right.low);
 }
 
 /*
diff --git a/src/test/regress/expected/box.out b/src/test/regress/expected/box.out
index 5f8b945..09db8e8 100644
--- a/src/test/regress/expected/box.out
+++ b/src/test/regress/expected/box.out
@@ -455,3 +455,116 @@ EXPLAIN (COSTS OFF) SELECT * FROM box_temp WHERE f1 ~= '(20,20),(40,40)';
 
 RESET enable_seqscan;
 DROP INDEX box_spgist;
+--
+-- Test the SP-GiST index with big random data
+--
+CREATE TEMPORARY TABLE quad_box_tbl (id int, b box);
+SELECT setseed(0);
+ setseed 
+-
+ 
+(1 row)
+
+INSERT INTO quad_box_tbl
+	SELECT i, box(point(x, y), point(x + w, y + h))
+	FROM (SELECT i,
+random() * 1000 as x, random() * 1000 as y,
+random() * 20 as w, random() * 20 as h
+			FROM generate_series(1, 1) AS i) q;
+-- Insert repeating data to test allTheSame
+INSERT INTO quad_box_tbl
+	SELECT i, '((200, 300),(210, 310))'
+	FROM generate_series(10001, 11000) AS i;
+INSERT INTO quad_box_tbl
+	VA

Re: [HACKERS] PATCH: recursive json_populate_record()

2017-01-25 Thread Nikita Glukhov

On 25.01.2017 23:58, Tom Lane wrote:

I think you need to take a second look at the code you're producing
and realize that it's not so clean either.  This extract from
populate_record_field, for example, is pretty horrid:


But what if we introduce some helper macros like this:

#define JsValueIsNull(jsv) \
((jsv)->is_json ? !(jsv)->val.json.str \
: !(jsv)->val.jsonb || (jsv)->val.jsonb->type == jbvNull)

#define JsValueIsString(jsv) \
((jsv)->is_json ? (jsv)->val.json.type == JSON_TOKEN_STRING \
: (jsv)->val.jsonb && (jsv)->val.jsonb->type == jbvString)


 /* prepare column metadata cache for the given type */
 if (col->typid != typid || col->typmod != typmod)
 prepare_column_cache(col, typid, typmod, mcxt, jsv->is_json);

 *isnull = JsValueIsNull(jsv);

 typcat = col->typcat;

 /* try to convert json string to a non-scalar type through input function 
*/
 if (JsValueIsString(jsv) &&
 (typcat == TYPECAT_ARRAY || typcat == TYPECAT_COMPOSITE))
 typcat = TYPECAT_SCALAR;

 /* we must perform domain checks for NULLs */
 if (*isnull && typcat != TYPECAT_DOMAIN)
 return (Datum) 0;

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PATCH: recursive json_populate_record()

2017-01-24 Thread Nikita Glukhov
It was my mistake that I was not familiar message-error guidelines.  Now
ereport() is used and there is no message assembling, but I'm also not sure
that we need to report these details.



5. The business with having some arguments that are only for json and
others that are only for jsonb, eg in populate_scalar(), also makes me
itch.


I've refactored json/jsonb passing using new struct JsValue which contains
union for json/jsonb values.


I wonder whether this wouldn't all get a lot simpler and more
readable if we gave up on trying to share code between the two cases.
In populate_scalar(), for instance, the amount of code actually shared
between the two cases amounts to a whole two lines, which are dwarfed
by the amount of crud needed to deal with trying to serve both cases
in the one function.  There are places where there's more sharable
code than that, but it seems like a better design might be to refactor
the sharable code out into subroutines called by separate functions for
json and jsonb.


Maybe two separate families of functions like this

a_json(common_args, json_args)
{
b(common_args);
c_json(json_args);
d(common_args);
}

a_jsonb(common_args, jsonb_args)
{
b(common_args);
c_jsonb(jsonb_args);
d(common_args);
}

could slightly improve readability, but such code duplication (I believe it is
a duplicate code) would never be acceptable to me.

I can only offer to extract two subroutines from from populate_scalar():
populate_scalar_json() and populate_scalar_jsonb().  I think InputFunctionCall()
here should have exactly the one call site because semantically there is only
the one such call per scalar, regardless of its type.



6. I'm not too excited by your having invented JsonContainerSize,
JsonContainerIsObject, etc, and then using them in just this new
code.  That is not really improving readability, it's just creating
stylistic inconsistency between these functions and the rest of the
jsonb code.


These new macros were introduced because existing JB_XXX() macros work with
Jsonb struct and there were no analogous macros for JsonbContainer struct.
They were not invented specifically for this patch: they are the result of the
deep refactoring of json/jsonb code which was made in the process of working on
jsonb compression (I'm going to present this work here soon).  This refactoring
allows us to use a single generic interface to json, jsonb and jsonbc (new
compressed format) types using ExpandedObject representation, but direct access
to JsonbContainer fields becomes illegal.  So I'am trying not to create new
direct references to JsonbContainer fields.  Also I could offer to rename these
macros to JBC_XXX() or JB_CONTAINER_XXX() but it would lead to unnecessary
conflicts.


If you want such macros I think it would be better to submit a separate
cosmetic patch that tries to hide such bit-tests behind macros throughout
the jsonb code.


I've attached that patch, but not all the bit-tests were hidden: some of them
in jsonb_util.c still remain valid after upcoming refactoring because they
don't belong to generic code (there might be better to use JBC_XXX() macros).


Another problem is that these macros are coding hazards because they look like
they yield bool values but they don't really; assigning the results to bool
variables, for example, would fail on most platforms.  Safer coding for a
general-purpose macro would be like

#define JsonContainerIsObject(jc)   (((jc)->header & JB_FOBJECT) != 0)


Sorry for this obvious mistake.  But macros JB_ROOT_IS_XXX() also contain the
same hazard.


7. More generally, the patch is hard to review because it whacks the
existing code around so thoroughly that it's tough even to match up
old and new code to get a handle on what you changed functionally.
Maybe it would be good if you could separate it into a refactoring
patch that just moves existing code around without functional changes,
and then a patch on top of that that adds code and makes only localized
changes in what's already there.


I've split this patch into two patches as you asked.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

  

diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
index 11a1395..64fb1c9 100644
--- a/src/backend/utils/adt/jsonb_util.c
+++ b/src/backend/utils/adt/jsonb_util.c
@@ -328,7 +328,7 @@ findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
 			JsonbValue *key)
 {
 	JEntry	   *children = container->children;
-	int			count = (container->header & JB_CMASK);
+	int			count = JsonContainerSize(container);
 	JsonbValue *result;
 
 	Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
@@ -339,7 +339,7 @@ findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
 
 	result = palloc(sizeof(JsonbValue));
 
-	if (flags & JB_FARRAY & container->header)
+	if ((flags & JB_FARRAY) && Js

[HACKERS] [PATCH] fix typo in commit a4523c5

2017-01-18 Thread Nikita Glukhov

Attached patch fixes typo in regression test 
src/test/regress/sql/create_index.sql
that was introduced in commit a4523c5
("Improve planning of btree index scans using ScalarArrayOpExpr quals.").

In this commit the following lines were added to create_index.sql:

SET enable_indexonlyscan = OFF;
...
RESET enable_indexscan;


Obviously, the last line should be

RESET enable_indexonlyscan;

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index c6dfb26..e519fdb 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2940,7 +2940,7 @@ ORDER BY thousand;
 1 | 1001
 (2 rows)
 
-RESET enable_indexscan;
+RESET enable_indexonlyscan;
 --
 -- Check elimination of constant-NULL subexpressions
 --
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 822c34a..1648072 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -1001,7 +1001,7 @@ SELECT thousand, tenthous FROM tenk1
 WHERE thousand < 2 AND tenthous IN (1001,3000)
 ORDER BY thousand;
 
-RESET enable_indexscan;
+RESET enable_indexonlyscan;
 
 --
 -- Check elimination of constant-NULL subexpressions

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] kNN for btree

2017-01-18 Thread Nikita Glukhov

Sorry for the broken formatting in my previous message.
Below is a corrected version of this message.


I'd like to present a series of patches that implements k-Nearest Neighbors
(kNN) search for btree, which can be used to speed up ORDER BY distance
queries like this:
SELECT * FROM events ORDER BY date <-> '2000-01-01'::date ASC LIMIT 100;

Now only GiST supports kNN, but kNN on btree can be emulated using
contrib/btree_gist.


Scanning algorithm
==

Algorithm is very simple: we use bidirectional B-tree index scan starting at
the point from which we measure the distance (target point).  At each step,
we advance this scan in the direction that has the  nearest point.  But when
the target point does not fall into the scanned range, we don't even need to
use a bidirectional scan here --- we can use ordinary unidirectional scan
in the right direction.


Performance results
===

Test database is taken from original kNN-GiST presentation (PGCon 2010).

Test query

SELECT * FROM events ORDER BY date <-> '1957-10-04'::date ASC LIMIT k;

can be optimized to the next rather complicated UNION form,
which no longer requires kNN:

WITH
t1 AS (SELECT * FROM events WHERE date >= '1957-10-04'::date
   ORDER BY date ASC  LIMIT k),
t2 AS (SELECT * FROM events WHERE date <  '1957-10-04'::date
   ORDER BY date DESC LIMIT k),
t  AS (SELECT * FROM t1 UNION SELECT * FROM t2)
SELECT * FROM t ORDER BY date <-> '1957-10-04'::date ASC LIMIT k;


In each cell of this table shown query execution time in milliseconds and
the number of accessed blocks:


 k  |  kNN-btree   |  kNN-GiST|  Opt. query   |  Seq. scan
|  | (btree_gist) |  with UNION   |  with sort
|--|--|---|
  1 |  0.041 4 |  0.079 4 |   0.060 8 |  41.1 1824
 10 |  0.048 7 |  0.091 9 |   0.09717 |  41.8 1824
100 |  0.10747 |  0.19252 |   0.342   104 |  42.3 1824
   1000 |  0.735   573 |  0.913   650 |   2.970  1160 |  43.5 1824
  1 |  5.070  5622 |  6.240  6760 |  36.300 11031 |  54.1 1824
 10 | 49.600 51608 | 61.900 64194 | 295.100 94980 | 115.0 1824


As you can see, kNN-btree can be two times faster than kNN-GiST (btree_gist)
when k < 1000, but the number of blocks read is roughly the same.


Implementation details
==

A brief description is given below for each of the patches:

1. Introduce amcanorderbyop() function

This patch transforms existing boolean AM property amcanorderbyop into a method
(function pointer).  This is necessary because, unlike GiST, kNN for btree
supports only a one ordering operator on the first index column and we need a
different pathkey matching logic for btree (there was a corresponding comment
in match_pathkeys_to_index()).  GiST-specific logic has been moved from
match_pathkeys_to_index() to gistcanorderbyop().

2. Extract substructure BTScanState from BTScanOpaque

This refactoring is necessary for bidirectional kNN-scan implementation.
Now, BTScanOpaque's substructure BTScanState containing only the fields
related to scan position is passed to some functions where the whole
BTScanOpaque was passed previously.

3. Extract get_index_column_opclass(), get_opclass_opfamily_and_input_type().

Extracted two simple common functions used in gistproperty() and
btproperty() (see the next patch).

4. Add kNN support to btree

  * Added additional optional BTScanState to BTScanOpaque for
bidirectional kNN scan.
  * Implemented bidirectional kNN scan.
  * Implemented logic for selecting kNN strategy
  * Implemented btcanorderbyop(), updated btproperty() and btvalidate()

B-tree user interface functions have not been altered because ordering
operators are used directly.

5. Add distance operators for some types

These operators for integer, float, date, time, timestamp, interval, cash and
oid types have been copied from contrib/btree_gist and added to the existing
btree opclasses as ordering operators.  Their btree_gist duplicates are removed
in the next patch.

6. Remove duplicate distance operators from contrib/btree_gist.

References to their own distance operators in btree_gist opclasses are
replaced with references to the built-in operators and than duplicate
operators are dropped.  But if the user is using somewhere these operators,
upgrade of btree_gist from 1.3 to 1.4 would fail.

7. Add regression tests for btree kNN.

Tests were added only after the built-in distance operators were added.


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company




--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PATCH: recursive json_populate_record()

2017-01-11 Thread Nikita Glukhov

On 01/08/2017 09:52 PM, Tom Lane wrote:


The example you quoted at the start of the thread doesn't fail for me
in HEAD, so I surmise that it's falling foul of some assertion you added
in the 0001 patch, but if so I think that assertion is wrong.  attndims
is really syntactic sugar only and doesn't affect anything meaningful
semantically.  Here is an example showing why it shouldn't:

regression=# create table foo (d0 _int4, d1 int[], d2 int[3][4]);
CREATE TABLE
regression=# select attname,atttypid,attndims from pg_attribute where attrelid = 
'foo'::regclass and attnum > 0;
  attname | atttypid | attndims
-+--+--
  d0  | 1007 |0
  d1  | 1007 |1
  d2  | 1007 |2
(3 rows)

Columns d0,d1,d2 are really all of the same type, and any code that
treats d0 and d1 differently is certainly broken.



Thank you for this example with raw _int4 type.  I didn't expect that
attndims can legally be zero.  There was really wrong assertion "ndims > 0"
that was necessary because I used attndims for verification of a
number of dimensions of a populated json array.

I have fixed the first patch: when the number of dimensionsis unknown
we determine it simply by the number of successive opening brackets at
the start of a json array.  But I'm still using for verification non-zero
ndims values that can be get from composite type attribute (attndims) or
from domain array type (typndims) through specially added function 
get_type_ndims().


On 01/08/2017 09:52 PM, Tom Lane wrote:


I do not see the point of the second one of these, and it adds no test
case showing why it would be needed.

I also have added special test cases for json_to_record() showing difference
in behavior that the second patch brings in (see changes in json.out and 
jsonb.out).


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 10e3186..55cacfb 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -11249,12 +11249,12 @@ table2-mapping
  whose columns match the record type defined by base
  (see note below).

-   select * from json_populate_record(null::myrowtype, '{"a":1,"b":2}')
+   select * from json_populate_record(null::myrowtype, '{"a": 1, "b": ["2", "a b"], "c": {"d": 4, "e": "a  b c"}}')

 
- a | b
+---
- 1 | 2
+ a |   b   |  c
+---+---+-
+ 1 | {2,"a b"} | (4,"a b c")
 

   
@@ -11343,12 +11343,12 @@ table2-mapping
  explicitly define the structure of the record with an AS
  clause.

-   select * from json_to_record('{"a":1,"b":[1,2,3],"c":"bar"}') as x(a int, b text, d text) 
+   select * from json_to_record('{"a":1,"b":[1,2,3],"c":[1,2,3],"e":"bar","r": {"a": 123, "b": "a b c"}}') as x(a int, b text, c int[], d text, r myrowtype) 

 
- a |b| d
+-+---
- 1 | [1,2,3] |
+ a |b|c| d |   r
+---+-+-+---+---
+ 1 | [1,2,3] | {1,2,3} |   | (123,"a b c")
 

   
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 17ee4e4..04959cb 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -93,7 +93,7 @@ static void elements_array_element_end(void *state, bool isnull);
 static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
 
 /* turn a json object into a hash table */
-static HTAB *get_json_object_as_hash(text *json, const char *funcname);
+static HTAB *get_json_object_as_hash(char *json, int len, const char *funcname);
 
 /* common worker for populate_record and to_record */
 static Datum populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
@@ -149,6 +149,33 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* helper functions for populate_record[set] */
+typedef struct ColumnIOData ColumnIOData;
+typedef struct RecordIOData RecordIOData;
+
+static HeapTupleHeader
+populate_record(TupleDesc		tupdesc,
+RecordIOData  **record_info,
+HeapTupleHeader	template,
+MemoryContext	mcxt,
+Oidjtype,
+HTAB	 	   *json_hash,
+JsonbContainer *cont);
+
+static Datum
+populate_record_field(ColumnIOData *col,
+	  Oid			typid,
+	  int32			typmod,
+	  int32			ndims,
+	  const char   *colname,
+	  MemoryContext	mcxt,
+	  Datum			defaultval,
+	  Oid			jtype,
+	  char		   *json,
+	  bool			json_is

Re: [HACKERS] PATCH: recursive json_populate_record()

2016-12-27 Thread Nikita Glukhov

> I've noticed that this patch is on CF and needs a reviewer so I decided
> to take a look. Code looks good to me in general, it's well covered by
> tests and passes `make check-world`.

Thanks for your review.

> However it would be nice to have a little more comments. In my opinion
> every procedure have to have at least a short description - what it
> does, what arguments it receives and what it returns, even if it's a
> static procedure. Same applies for structures and their fields.

I have added some comments for functions and structures in the second 
version of this patch.


> Another thing that bothers me is a FIXME comment:
>
> ```
> tupdesc = lookup_rowtype_tupdesc(type, typmod); /* FIXME cache */
> ```
>
> I suggest remove it or implement a caching here as planned.

I have implemented tuple descriptor caching here in populate_composite() 
and also in populate_record_worker() (by using populate_composite() 
instead of populate_record()). These improvements can speed up bulk 
jsonb conversion by 15-20% in the simple test with two nested records.


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 10e3186..55cacfb 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -11249,12 +11249,12 @@ table2-mapping
  whose columns match the record type defined by base
  (see note below).

-   select * from json_populate_record(null::myrowtype, '{"a":1,"b":2}')
+   select * from json_populate_record(null::myrowtype, '{"a": 1, "b": ["2", "a b"], "c": {"d": 4, "e": "a  b c"}}')

 
- a | b
+---
- 1 | 2
+ a |   b   |  c
+---+---+-
+ 1 | {2,"a b"} | (4,"a b c")
 

   
@@ -11343,12 +11343,12 @@ table2-mapping
  explicitly define the structure of the record with an AS
  clause.

-   select * from json_to_record('{"a":1,"b":[1,2,3],"c":"bar"}') as x(a int, b text, d text) 
+   select * from json_to_record('{"a":1,"b":[1,2,3],"c":[1,2,3],"e":"bar","r": {"a": 123, "b": "a b c"}}') as x(a int, b text, c int[], d text, r myrowtype) 

 
- a |b| d
+-+---
- 1 | [1,2,3] |
+ a |b|c| d |   r
+---+-+-+---+---
+ 1 | [1,2,3] | {1,2,3} |   | (123,"a b c")
 

   
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 17ee4e4..bb72b8e 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -93,7 +93,7 @@ static void elements_array_element_end(void *state, bool isnull);
 static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
 
 /* turn a json object into a hash table */
-static HTAB *get_json_object_as_hash(text *json, const char *funcname);
+static HTAB *get_json_object_as_hash(char *json, int len, const char *funcname);
 
 /* common worker for populate_record and to_record */
 static Datum populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
@@ -149,6 +149,33 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* helper functions for populate_record[set] */
+typedef struct ColumnIOData ColumnIOData;
+typedef struct RecordIOData RecordIOData;
+
+static HeapTupleHeader
+populate_record(TupleDesc		tupdesc,
+RecordIOData  **record_info,
+HeapTupleHeader	template,
+MemoryContext	mcxt,
+Oidjtype,
+HTAB	 	   *json_hash,
+JsonbContainer *cont);
+
+static Datum
+populate_record_field(ColumnIOData *col,
+	  Oid			typid,
+	  int32			typmod,
+	  int32			ndims,
+	  const char   *colname,
+	  MemoryContext	mcxt,
+	  Datum			defaultval,
+	  Oid			jtype,
+	  char		   *json,
+	  bool			json_is_string,
+	  JsonbValue   *jval,
+	  bool		   *isnull);
+
 /* state for json_object_keys */
 typedef struct OkeysState
 {
@@ -216,6 +243,7 @@ typedef struct JhashState
 	HTAB	   *hash;
 	char	   *saved_scalar;
 	char	   *save_json_start;
+	bool		saved_scalar_is_string;
 } JHashState;
 
 /* hashtable element */
@@ -223,26 +251,67 @@ typedef struct JsonHashEntry
 {
 	char		fname[NAMEDATALEN];		/* hash key (MUST BE FIRST) */
 	char	   *val;
-	char	   *json;
 	bool		isnull;
+	bool		isstring;
 } JsonHashEntry;
 
-/* these two are stolen from hstore / record_out, used in populate_record* */
-typedef struct ColumnIOData
+/* structure to cache type I/O metadata needed for populate_scalar() */
+typedef struct ScalarIODat

[HACKERS] PATCH: recursive json_populate_record()

2016-12-12 Thread Nikita Glukhov

Hi.

The first attached patch implements recursive processing of nested 
objects and arrays in json[b]_populate_record[set](), 
json[b]_to_record[set](). See regression tests for examples.


It also fixes the following errors/inconsistencies caused by lost 
quoting of string json values:


[master]=# select * from json_to_record('{"js": "a"}') as rec(js json);
ERROR:  invalid input syntax for type json
DETAIL:  Token "a" is invalid.
CONTEXT:  JSON data, line 1: a

[master]=# select * from json_to_record('{"js": "true"}') as rec(js json);
  js
--
true

[patched]=# select * from json_to_record('{"js": "a"}') as rec(js json);
  js
-
 "a"

[patched]=# select * from json_to_record('{"js": "true"}') as rec(js json);
   js

 "true"


The second patch adds assignment of correct ndims to array fields of 
RECORD function result types.
Without this patch, attndims in tuple descriptors of RECORD types is 
always 0 and the corresponding assertion fails in the next test:


[patched]=# select json_to_record('{"a": [1, 2, 3]}') as rec(a int[]);


Should I submit these patches to commitfest?

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index eca98df..12049a4 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -11249,12 +11249,12 @@ table2-mapping
  whose columns match the record type defined by base
  (see note below).

-   select * from json_populate_record(null::myrowtype, '{"a":1,"b":2}')
+   select * from json_populate_record(null::myrowtype, '{"a": 1, "b": ["2", "a b"], "c": {"d": 4, "e": "a  b c"}}')

 
- a | b
+---
- 1 | 2
+ a |   b   |  c
+---+---+-
+ 1 | {2,"a b"} | (4,"a b c")
 

   
@@ -11343,12 +11343,12 @@ table2-mapping
  explicitly define the structure of the record with an AS
  clause.

-   select * from json_to_record('{"a":1,"b":[1,2,3],"c":"bar"}') as x(a int, b text, d text) 
+   select * from json_to_record('{"a":1,"b":[1,2,3],"c":[1,2,3],"e":"bar","r": {"a": 123, "b": "a b c"}}') as x(a int, b text, c int[], d text, r myrowtype) 

 
- a |b| d
+-+---
- 1 | [1,2,3] |
+ a |b|c| d |   r
+---+-+-+---+---
+ 1 | [1,2,3] | {1,2,3} |   | (123,"a b c")
 

   
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 17ee4e4..729826c 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -93,7 +93,7 @@ static void elements_array_element_end(void *state, bool isnull);
 static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
 
 /* turn a json object into a hash table */
-static HTAB *get_json_object_as_hash(text *json, const char *funcname);
+static HTAB *get_json_object_as_hash(char *json, int len, const char *funcname);
 
 /* common worker for populate_record and to_record */
 static Datum populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
@@ -149,6 +149,33 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* helper functions for populate_record[set] */
+typedef struct ColumnIOData ColumnIOData;
+typedef struct RecordIOData RecordIOData;
+
+static HeapTupleHeader
+populate_record(TupleDesc		tupdesc,
+RecordIOData  **record_info,
+HeapTupleHeader	template,
+MemoryContext	mcxt,
+Oidjtype,
+HTAB	 	   *json_hash,
+JsonbContainer *cont);
+
+static Datum
+populate_record_field(ColumnIOData *col,
+	  Oid			type,
+	  int32			typmod,
+	  int32			ndims,
+	  const char   *colname,
+	  MemoryContext	mcxt,
+	  Datum			defaultval,
+	  Oid			jtype,
+	  char		   *json,
+	  bool			json_is_string,
+	  JsonbValue   *jval,
+	  bool		   *isnull);
+
 /* state for json_object_keys */
 typedef struct OkeysState
 {
@@ -216,6 +243,7 @@ typedef struct JhashState
 	HTAB	   *hash;
 	char	   *saved_scalar;
 	char	   *save_json_start;
+	bool		saved_scalar_is_string;
 } JHashState;
 
 /* hashtable element */
@@ -223,26 +251,55 @@ typedef struct JsonHashEntry
 {
 	char		fname[NAMEDATALEN];		/* hash key (MUST BE FIRST) */
 	char	   *val;
-	char	   *json;
 	bool		isnull;
+	bool		isstring;
 } JsonHashEntry;
 
-/* these two are stolen from hstore / record_out, used in populate_record* */
-type

Re: [HACKERS] Bug in comparison of empty jsonb arrays to scalars

2016-11-10 Thread Nikita Glukhov

On 10.11.2016 09:54, Michael Paquier wrote:


Yes, definitely.
=# create table json_data (a jsonb);
CREATE TABLE
=# INSERT INTO json_data values ('{}'::jsonb), ('[]'::jsonb),
('null'::jsonb), ('true'::jsonb), ('1'::jsonb), ('""'::jsonb);
INSERT 0 6
=# SELECT * FROM json_data ORDER BY 1 DESC;
   a
--
  {}
  true
  1
  ""
  null
  []
(6 rows)
So that's object > boolean > integer > string > NULL > array.

And attached is a patch.


Perhaps I did not explain it clearly enough, but only *empty top-level* 
arrays are out of the correct order.

See complete example:

=# SELECT * FROM (VALUES
('null'::jsonb), ('0'), ('""'), ('true'), ('[]'), ('{}'),
('[null]'), ('[0]'), ('[""]'), ('[true]'), ('[[]]'), ('[{}]'),
('{"a": null}'), ('{"a": 0}'), ('{"a": ""}'), ('{"a": true}'), 
('{"a": []}'), ('{"a": {}}')

) valsORDER BY 1;
   column1
-
 []
 null
 ""
 0
 true
 [null]
 [""]
 [0]
 [true]
 [[]]
 [{}]
 {}
 {"a": null}
 {"a": ""}
 {"a": 0}
 {"a": true}
 {"a": []}
 {"a": {}}
(18 rows)


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] Bug in comparison of empty jsonb arrays to scalars

2016-11-08 Thread Nikita Glukhov

Hi hackers.

While working on jsonbstatistics, I found the following bug:
an empty jsonb array is considered to be lesser than any scalar,
but it is expected that objects > arrays > scalars.

# select '[]'::jsonb < 'null'::jsonb;
 ?column?
--
 t
(1 row)

Attached patch contains:
 1. bug fix (added the missing "else" in compareJsonbContainers())
 2. regression test
 3. pg_upgrade: invalidation of btree indexes on jsonb columns and 
REINDEX-script generation


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
index ddc34ce..43934bf 100644
--- a/src/backend/utils/adt/jsonb_util.c
+++ b/src/backend/utils/adt/jsonb_util.c
@@ -232,7 +232,7 @@ compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
 		 */
 		if (va.val.array.rawScalar != vb.val.array.rawScalar)
 			res = (va.val.array.rawScalar) ? -1 : 1;
-		if (va.val.array.nElems != vb.val.array.nElems)
+		else if (va.val.array.nElems != vb.val.array.nElems)
 			res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
 		break;
 	case jbvObject:
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 42bf499..81c1616 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -115,6 +115,11 @@ check_and_dump_old_cluster(bool live_check)
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
 		new_9_0_populate_pg_largeobject_metadata(_cluster, true);
 
+	/* Pre-PG 10.0 had bug in jsonb comparison operator  */
+	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906 &&
+		GET_MAJOR_VERSION(old_cluster.major_version) >= 904)
+		old_9_6_invalidate_jsonb_btree_indexes(_cluster, true);
+
 	/*
 	 * While not a check option, we do this now because this is the only time
 	 * the old server is running.
@@ -166,11 +171,26 @@ report_clusters_compatible(void)
 void
 issue_warnings(void)
 {
-	/* Create dummy large object permissions for old < PG 9.0? */
-	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
+	bool need_new_9_0_populate_pg_largeobject_metadata =
+			GET_MAJOR_VERSION(old_cluster.major_version) <= 804;
+
+	bool need_old_9_6_invalidate_jsonb_btree_indexes =
+			GET_MAJOR_VERSION(old_cluster.major_version) <= 906 &&
+			GET_MAJOR_VERSION(old_cluster.major_version) >= 904;
+
+	if (need_new_9_0_populate_pg_largeobject_metadata ||
+		need_old_9_6_invalidate_jsonb_btree_indexes)
 	{
 		start_postmaster(_cluster, true);
-		new_9_0_populate_pg_largeobject_metadata(_cluster, false);
+
+		/* Create dummy large object permissions for old < PG 9.0? */
+		if (need_new_9_0_populate_pg_largeobject_metadata)
+			new_9_0_populate_pg_largeobject_metadata(_cluster, false);
+
+		/* invalidate jsonb btree indexes for old < PG 10.0  */
+		if (need_old_9_6_invalidate_jsonb_btree_indexes)
+			old_9_6_invalidate_jsonb_btree_indexes(_cluster, false);
+
 		stop_postmaster(false);
 	}
 }
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 19dca83..07e0ca6 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -442,6 +442,8 @@ void		pg_putenv(const char *var, const char *val);
 void new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster,
 		 bool check_mode);
 void		old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
+void old_9_6_invalidate_jsonb_btree_indexes(ClusterInfo *cluster,
+			bool check_mode);
 
 /* parallel.c */
 void parallel_exec_prog(const char *log_file, const char *opt_log_file,
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index 3c7c5fa..b1a3b89 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -10,6 +10,7 @@
 #include "postgres_fe.h"
 
 #include "pg_upgrade.h"
+#include "catalog/pg_type.h"
 #include "fe_utils/string_utils.h"
 
 
@@ -185,3 +186,116 @@ old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
 	else
 		check_ok();
 }
+
+/*
+ * old_9_6_invalidate_jsonb_btree_indexes()
+ *	9.4-9.6 -> 10.0
+ *	Btree index ordering for jsonb had been fixed in 10.0
+ */
+void
+old_9_6_invalidate_jsonb_btree_indexes(ClusterInfo *cluster, bool check_mode)
+{
+	int			dbnum;
+	FILE	   *script = NULL;
+	bool		found = false;
+	char		output_path[MAXPGPATH];
+
+	prep_status("Checking for jsonb btree indexes existence");
+
+	snprintf(output_path, sizeof(output_path), "reindex_jsonb_btree.sql");
+
+	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+	{
+		PGresult   *res;
+		bool		db_used = false;
+		int			ntups;
+		int			rowno;
+		int			i_nspname,
+	i_relname;
+		DbInfo	   *active_db = >dbarr.dbs[dbnum];
+		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
+
+		/* find jsonb btree indexes */
+		res = executeQueryOrDie(conn,
+"