I wrote:
> I have a few more things in mind I'm planning to look into:
>
> - I'm not convinced that there's enough sanity checking in the input
> functions. I think you can send queries into the server using the binary
> protocol that you couldn't produce otherwise. For example, queries with
> multiple VAL nodes, with no operator to connect them. Does that wreak
> havoc in any of the functions?
I added code to check that the query tree is well-formed. It was indeed
possible to send malformed queries in binary mode, which produced all
kinds of strange results.
> And the left-field of QueryOperator is an
> int16, what happens if you have query that exceeds
> that?
Apparently the field wraps around and you get a backend crash when it
tries to do access an array using a negative index. You need to have a
large stack (and increase max_stack_depth now that we have the check for
that in there) to reproduce it. Not sure if it's an exploitable security
vulnerability, but it might be.
I fixed this by making the left-field a uint32. There's no reason to
arbitrarily limit it to 16-bits, and it won't increase the disk/memory
footprint either now that QueryOperator and QueryOperand are separate
structs.
> And parse_query always produces trees that are in prefix notation,
> so the left-field is really redundant, but using tsqueryrecv, you could
> inject queries that are not in prefix notation; is there anything in the
> code that depends on that?
At least infix-function seems to depend on it. By checking that the tree
is well-formed, and the fact that the left operand is implicitly the
next node in the array, we know that it is in prefix notation.
> - There's many internal intermediate representations of a query:
> TSQuery, a QTNode-tree, NODE-tree (in tsquery_cleanup.c), prefix
> notation stack of QueryItems (in parser), infix-tree. Could we remove
> some of these?
I haven't looked into this yet. Might leave it for 8.4.
> - There's a lot of recursive functions. Are they all using
> check_stack_depth?
I added check_stack_depth() call to all recursive functions I found.
Some of them might have a natural limit so that you can't force
arbitrarily deep recursions, but check_stack_depth() is cheap enough
that seems best to just stick it into anything that might be a problem.
Patch attached. It's on top of the tsearch-refactor-2.patch I posted
earlier.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery.c 2007-09-03 11:12:17.000000000 +0100
--- ./src/backend/utils/adt/tsquery.c 2007-09-05 11:59:09.000000000 +0100
***************
*** 21,26 ****
--- 21,27 ----
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
#include "utils/pg_crc.h"
+ #include "nodes/bitmapset.h"
struct TSQueryParserStateData
***************
*** 388,394 ****
* QueryItems must be in polish (prefix) notation.
*/
static void
! findoprnd(QueryItem *ptr, int *pos)
{
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
--- 389,395 ----
* QueryItems must be in polish (prefix) notation.
*/
static void
! findoprnd(QueryItem *ptr, uint32 *pos)
{
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
***************
*** 451,457 ****
TSQuery query;
int commonlen;
QueryItem *ptr;
! int pos = 0;
ListCell *cell;
/* init state */
--- 452,458 ----
TSQuery query;
int commonlen;
QueryItem *ptr;
! uint32 pos = 0;
ListCell *cell;
/* init state */
***************
*** 792,797 ****
--- 793,799 ----
QueryItem *item;
int datalen = 0;
char *ptr;
+ Bitmapset *parentset = NULL;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
***************
*** 814,819 ****
--- 816,830 ----
item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
item->operand.length = pq_getmsgint(buf, sizeof(int16));
+ /* Check that the weight bitmap is valid */
+ if (item->operand.weight < 0 || item->operand.weight > 0xF)
+ elog(ERROR, "invalid weight bitmap");
+
+ /* XXX: We don't check that the CRC is valid. Actually, if we
+ * bothered to calculate it to verify, there would be no need
+ * to transfer it.
+ */
+
/*
* Check that datalen doesn't grow too large. Without the
* check, a malicious client could induce a buffer overflow
***************
*** 828,834 ****
elog(ERROR, "invalid tsquery; total operand length exceeded");
/* We can calculate distance from datalen, no need to send it
! * through the wire. If we did, we would have to check that
* it's valid anyway.
*/
item->operand.distance = datalen;
--- 839,845 ----
elog(ERROR, "invalid tsquery; total operand length exceeded");
/* We can calculate distance from datalen, no need to send it
! * across the wire. If we did, we would have to check that
* it's valid anyway.
*/
item->operand.distance = datalen;
***************
*** 842,864 ****
item->operator.oper != OP_OR &&
item->operator.oper != OP_AND)
elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
if(item->operator.oper != OP_NOT)
{
! item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
/*
! * Sanity checks
*/
! if (item->operator.left <= 0 || i + item->operator.left >= size)
elog(ERROR, "invalid pointer to left operand");
! /* XXX: Though there's no way to construct a TSQuery that's
! * not in polish notation, we don't enforce that for
! * queries received from client in binary mode. Is there
! * anything that relies on it?
! *
! * XXX: The tree could be malformed in other ways too,
! * a node could have two parents, for example.
*/
}
if (i == size - 1)
--- 853,890 ----
item->operator.oper != OP_OR &&
item->operator.oper != OP_AND)
elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+
+ /*
+ * Check that no previous operator node points to the right
+ * operand. That would mean that the operand node
+ * has two parents.
+ */
+ if (bms_is_member(i + 1, parentset))
+ elog(ERROR, "malformed query tree");
+
+ parentset = bms_add_member(parentset, i + 1);
+
if(item->operator.oper != OP_NOT)
{
! uint32 left = (uint32) pq_getmsgint(buf, sizeof(uint32));
!
/*
! * Right operand is implicitly at "this+1". Don't allow
! * left to point to the right operand, or to self.
*/
! if (left <= 1 || i + left >= size)
elog(ERROR, "invalid pointer to left operand");
! /*
! * Check that no previous operator node points to the left
! * operand.
*/
+ if (bms_is_member(i + left, parentset))
+ elog(ERROR, "malformed query tree");
+
+ parentset = bms_add_member(parentset, i + left);
+
+ item->operator.left = left;
}
if (i == size - 1)
***************
*** 871,876 ****
--- 897,907 ----
item++;
}
+ /* Now check that each node, except the root, has a parent. We
+ * already checked above that no node has more than one parent. */
+ if (bms_num_members(parentset) != size - 1 && size != 0)
+ elog(ERROR, "malformed query tree");
+
query = (TSQuery) repalloc(query, len + datalen);
item = GETQUERY(query);
diff -c -r ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_cleanup.c ./src/backend/utils/adt/tsquery_cleanup.c
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_cleanup.c 2007-08-31 19:34:58.000000000 +0100
--- ./src/backend/utils/adt/tsquery_cleanup.c 2007-09-05 12:14:43.000000000 +0100
***************
*** 57,62 ****
--- 57,65 ----
static void
plainnode(PLAINTREE * state, NODE * node)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (state->cur == state->len)
{
state->len *= 2;
***************
*** 107,112 ****
--- 110,118 ----
static void
freetree(NODE * node)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (!node)
return;
if (node->left)
***************
*** 125,130 ****
--- 131,139 ----
static NODE *
clean_NOT_intree(NODE * node)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (node->valnode->type == QI_VAL)
return node;
***************
*** 203,208 ****
--- 212,220 ----
char lresult = V_UNKNOWN,
rresult = V_UNKNOWN;
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (node->valnode->type == QI_VAL)
return node;
else
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_rewrite.c 2007-08-31 22:08:41.000000000 +0100
--- ./src/backend/utils/adt/tsquery_rewrite.c 2007-09-05 12:18:46.000000000 +0100
***************
*** 22,27 ****
--- 22,30 ----
static int
addone(int *counters, int last, int total)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
counters[last]++;
if (counters[last] >= total)
{
***************
*** 173,178 ****
--- 176,184 ----
static QTNode *
dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
root = findeq(root, ex, subs, isfind);
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_util.c 2007-08-31 22:15:56.000000000 +0100
--- ./src/backend/utils/adt/tsquery_util.c 2007-09-05 12:21:29.000000000 +0100
***************
*** 22,27 ****
--- 22,30 ----
{
QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
node->valnode = in;
if (in->type == QI_OPR)
***************
*** 53,58 ****
--- 56,64 ----
if (!in)
return;
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word);
***************
*** 79,84 ****
--- 85,93 ----
int
QTNodeCompare(QTNode * an, QTNode * bn)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
***************
*** 133,138 ****
--- 142,150 ----
{
int i;
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (in->valnode->type != QI_OPR)
return;
***************
*** 165,170 ****
--- 177,185 ----
{
int i;
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (in->valnode->type != QI_OPR)
return;
***************
*** 205,210 ****
--- 220,228 ----
{
int i;
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (in->valnode->type != QI_OPR)
return;
***************
*** 244,249 ****
--- 262,270 ----
static void
cntsize(QTNode * in, int *sumlen, int *nnode)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
*nnode += 1;
if (in->valnode->type == QI_OPR)
{
***************
*** 268,273 ****
--- 289,297 ----
static void
fillQT(QTN2QTState *state, QTNode *in)
{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
if (in->valnode->type == QI_VAL)
{
memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
***************
*** 325,331 ****
QTNode *
QTNCopy(QTNode *in)
{
! QTNode *out = (QTNode *) palloc(sizeof(QTNode));
*out = *in;
out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
--- 349,360 ----
QTNode *
QTNCopy(QTNode *in)
{
! QTNode *out;
!
! /* since this function recurses, it could be driven to stack overflow. */
! check_stack_depth();
!
! out = (QTNode *) palloc(sizeof(QTNode));
*out = *in;
out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsrank.c 2007-08-31 22:24:34.000000000 +0100
--- ./src/backend/utils/adt/tsrank.c 2007-09-05 12:24:27.000000000 +0100
***************
*** 508,513 ****
--- 508,517 ----
int i;
bool found = false;
+ /* since this function recurses, it could be driven to stack overflow.
+ * (though any decent compiler will optimize away the tail-recursion. */
+ check_stack_depth();
+
reset_istrue_flag(query);
ext->p = 0x7fffffff;
*** ../pgsql.tsearch-1/src/include/tsearch/ts_type.h 2007-09-03 11:10:37.000000000 +0100
--- ./src/include/tsearch/ts_type.h 2007-09-05 12:17:02.000000000 +0100
***************
*** 159,165 ****
typedef struct
{
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
! int8 weight; /* weights of operand to search. Is this a bitmask? checkclass_str seems to think so */
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate data type, but we use comparisons to signed integers in the code. They would need to be changed as well. */
/* pointer to text value of operand, must correlate with WordEntry */
--- 159,172 ----
typedef struct
{
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
!
! /* Weights of operand to search. This is a bitmap:
! * A: 1<<3
! * B: 1<<2
! * C: 1<<1
! * D: 1<<0
! */
! int8 weight;
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate data type, but we use comparisons to signed integers in the code. They would need to be changed as well. */
/* pointer to text value of operand, must correlate with WordEntry */
***************
*** 179,185 ****
{
QueryItemType type;
int8 oper; /* see above */
! int16 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
} QueryOperator;
--- 186,192 ----
{
QueryItemType type;
int8 oper; /* see above */
! uint32 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
} QueryOperator;
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?
http://archives.postgresql.org