On Mon, Jun 26, 2023 at 4:36 AM Joel Jacobson <[email protected]> wrote:
>
> On Sun, Jun 25, 2023, at 11:42, Joel Jacobson wrote:
> > SELECT hashset_contains('{}'::int4hashset, NULL::int);
> >
> > would be False, according to the General Rules.
> >
> ...
> > Applying the same rules, we'd have to return Unknown (which we
represent as
> > null) for:
> >
> > SELECT hashset_contains('{null}'::int4hashset, NULL::int);
> >
>
> Aha! I just discovered to my surprise that the corresponding array
> queries gives the same result:
>
> SELECT NULL = ANY(ARRAY[]::int[]);
> ?column?
> ----------
> f
> (1 row)
>
> SELECT NULL = ANY(ARRAY[NULL]::int[]);
> ?column?
> ----------
>
> (1 row)
>
> I have no more objections; let's stick to the same null semantics as
arrays and multisets.
>
> /Joel
Can you try to glue the attached to the hashset data type input function.
the attached will parse cstring with double quote and not. so '{1,2,3}' ==
'{"1","2","3"}'. obviously quote will preserve the inner string as is.
currently int4hashset input is delimited by comma, if you want deal with
range then you need escape the comma.
/*
gcc -I/home/jian/postgres/2023_05_25_beta5421/include/server -fPIC -c /home/jian/Desktop/regress_pgsql/input_validate.c
gcc -shared -o /home/jian/Desktop/regress_pgsql/input_validate.so /home/jian/Desktop/regress_pgsql/input_validate.o
CREATE OR REPLACE FUNCTION str_delim_count_validate(cstring) RETURNS BOOL SET search_path from current
AS '/home/jian/Desktop/regress_pgsql/input_validate', 'str_delim_count_validate'
LANGUAGE C IMMUTABLE;
select str_delim_count_validate('{"23890","2","3", "a",1,2,3,4,NULL,2022-01-01,"[1,2]"}');
select str_delim_count_validate('{"3 ", }'); --fail
select str_delim_count_validate('{"3 " }'); --ok
select str_delim_count_validate('{"""23890"}'); --fail.
select str_delim_count_validate('{}'); --ok
select str_delim_count_validate('}'); --fail.
select str_delim_count_validate('{'); --fail.
select str_delim_count_validate('{{}}'); --fail.
select str_delim_count_validate('{{}}'); --fail.
select str_delim_count_validate('{"22022-01-01,[1,2]}'); --fail.
select str_delim_count_validate('{" 2022-01-01 "}'); --ok
select str_delim_count_validate('{ 2022-01-01 }'); --ok
select str_delim_count_validate('{ 2022-01-01 ,"[1,2]"} '); --ok
select str_delim_count_validate('{ 2023-06-26 16:45:02.454293+08 ,"2","3"}'); --ok.
select str_delim_count_validate('{"\\t"}'); --ok
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
#include "utils/numeric.h"
#include "funcapi.h"
#include "utils/lsyscache.h"
#include "utils/fmgrprotos.h"
#include "common/hashfn.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(str_delim_count_validate);
static int SetCount(const char *str, char typdelim, Node *escontext);
static bool ReadSetStr(char *arrayStr,const char *origStr, char typdelim, Node *escontext);
static bool set_isspace(char ch);
Datum
str_delim_count_validate(PG_FUNCTION_ARGS)
{
char *string = PG_GETARG_CSTRING(0);
char *string_save;
char *p;
/* Make a modifiable copy of the input */
string_save = pstrdup(string);
char typdelim = ',';
p = string_save;
int nitems;
nitems = SetCount(p,typdelim, fcinfo->context);
if (!ReadSetStr(p, string,typdelim,fcinfo->context))
elog(INFO,"delimuite str failed");
elog(INFO,"line %d nitems=%d",__LINE__,nitems);
PG_RETURN_BOOL(true);
}
/*
* array_isspace() --- a non-locale-dependent isspace()
*
* We used to use isspace() for parsing array values, but that has
* undesirable results: an array value might be silently interpreted
* differently depending on the locale setting. Now we just hard-wire
* the traditional ASCII definition of isspace().
*/
static bool
set_isspace(char ch)
{
if (ch == ' ' ||
ch == '\t' ||
ch == '\n' ||
ch == '\r' ||
ch == '\v' ||
ch == '\f')
return true;
return false;
}
static bool
ReadSetStr(char *arrayStr,const char *origStr, char typdelim, Node *escontext)
{
int i;
char *srcptr;
bool in_quotes = false;
bool eoArray = false;
bool hasnull;
int32 totbytes;
int indx = 0;
/*
* We have to remove " and \ characters to create a clean item value to
* pass to the datatype input routine. We overwrite each item value
* in-place within arrayStr to do this. srcptr is the current scan point,
* and dstptr is where we are copying to.
*
* We also want to suppress leading and trailing unquoted whitespace. We
* use the leadingspace flag to suppress leading space. Trailing space is
* tracked by using dstendptr to point to the last significant output
* character.
*
* The error checking in this routine is mostly pro-forma, since we expect
* that SetCount() already validated the string. So we don't bother
* with errdetail messages.
*/
srcptr = arrayStr;
while (!eoArray)
{
bool itemdone = false;
bool leadingspace = true;
bool hasquoting = false;
char *itemstart;
char *dstptr;
char *dstendptr;
itemstart = dstptr = dstendptr = srcptr;
while (!itemdone)
{
switch(*srcptr)
{
case '\0':
/* Signal a premature end of the string */
ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"",
origStr)));
break;
case '\\':
/* Skip backslash, copy next character as-is. */
srcptr++;
if (*srcptr == '\0')
ereturn(escontext,false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"",
origStr)));
*dstptr++ = *srcptr++;
/* Treat the escaped character as non-whitespace */
leadingspace = false;
dstendptr = dstptr;
hasquoting = true; /* can't be a NULL marker */
break;
case '"':
in_quotes = !in_quotes;
if (in_quotes)
leadingspace = false;
else
{
/*
* Advance dstendptr when we exit in_quotes; this
* saves having to do it in all the other in_quotes
* cases.
*/
dstendptr = dstptr;
}
hasquoting = true; /* can't be a NULL marker */
srcptr++;
break;
case '{':
if (!in_quotes)
{
srcptr++;
}
else
*dstptr++ = *srcptr++;
break;
case '}':
if (!in_quotes)
{
eoArray = itemdone = true;
srcptr++;
}
else
*dstptr++ = *srcptr++;
break;
default :
if(in_quotes)
*dstptr++ = *srcptr++;
else if (*srcptr == typdelim)
{
itemdone = true;
srcptr ++;
}
else if (set_isspace(*srcptr))
{
/*
* If leading space, drop it immediately. Else, copy
* but don't advance dstendptr.
*/
if(leadingspace)
srcptr++;
else
*dstptr++ = *srcptr++;
}
else
{
*dstptr++ = *srcptr++;
leadingspace = false;
dstendptr = dstptr;
}
break;
}
}
Assert(dstptr < srcptr);
*dstendptr = '\0';
elog(INFO,"line [%04d] itemstart:|%s|",__LINE__,itemstart);
}
return true;
}
typedef enum
{
SET_NO_LEVEL,
SET_LEVEL_STARTED,
SET_ELEM_STARTED,
SET_ELEM_COMPLETED,
SET_QUOTED_ELEM_STARTED,
SET_QUOTED_ELEM_COMPLETED,
SET_ELEM_DELIMITED,
SET_LEVEL_COMPLETED,
SET_LEVEL_DELIMITED
} SetParseState;
/*
* SetCount
* Determines the dimensions for an array string.
*
* Returns number of dimensions as function result. The axis lengths are
* returned in dim[], which must be of size MAXDIM.
*
* If we detect an error, fill *escontext with error details and return -1
* (unless escontext isn't provided, in which case errors will be thrown).
*/
static int
SetCount(const char *str, char typdelim, Node *escontext)
{
int nest_level = 0,
nelems = 1;
bool in_quotes = false;
bool eoArray = false;
bool empty_array = true;
const char *ptr;
SetParseState parse_state = SET_NO_LEVEL;
ptr = str;
while (!eoArray)
{
bool itemdone = false;
while (!itemdone)
{
if (parse_state == SET_ELEM_STARTED ||
parse_state == SET_QUOTED_ELEM_STARTED)
empty_array = false;
switch (*ptr)
{
case '\0':
/* Signal a premature end of the string */
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected end of input.")));
case '\\':
/*
* An escape must be after a level start, after an element
* start, or after an element delimiter. In any case we
* now must be past an element start.
*/
if (parse_state != SET_LEVEL_STARTED &&
parse_state != SET_ELEM_STARTED &&
parse_state != SET_QUOTED_ELEM_STARTED &&
parse_state != SET_ELEM_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
'\\')));
if (parse_state != SET_QUOTED_ELEM_STARTED)
parse_state = SET_ELEM_STARTED;
/* skip the escaped character */
if (*(ptr + 1))
ptr++;
else
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected end of input.")));
break;
case '"':
/*
* A quote must be after a level start, after a quoted
* element start, or after an element delimiter. In any
* case we now must be past an element start.
*/
if (parse_state != SET_LEVEL_STARTED &&
parse_state != SET_QUOTED_ELEM_STARTED &&
parse_state != SET_ELEM_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected array element.")));
in_quotes = !in_quotes;
if (in_quotes)
parse_state = SET_QUOTED_ELEM_STARTED;
else
parse_state = SET_QUOTED_ELEM_COMPLETED;
break;
case '{':
if (!in_quotes)
{
/*
* A left brace can occur if no nesting has occurred
* yet, after a level start, or after a level
* delimiter.
*/
if (parse_state != SET_NO_LEVEL &&
parse_state != SET_LEVEL_STARTED &&
parse_state != SET_LEVEL_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
'{')));
parse_state = SET_LEVEL_STARTED;
if (nest_level >= 1)
ereturn(escontext, -1,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
nest_level + 1, 1)));
nest_level++;
}
break;
case '}':
if (!in_quotes)
{
/*
* A right brace can occur after an element start, an
* element completion, a quoted element completion, or
* a level completion.
*/
if (parse_state != SET_ELEM_STARTED &&
parse_state != SET_ELEM_COMPLETED &&
parse_state != SET_QUOTED_ELEM_COMPLETED &&
parse_state != SET_LEVEL_COMPLETED &&
!(nest_level == 1 && parse_state == SET_LEVEL_STARTED))
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
'}')));
parse_state = SET_LEVEL_COMPLETED;
if (nest_level == 0)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unmatched \"%c\" character.", '}')));
nest_level--;
if (nest_level == 0)
eoArray = itemdone = true;
}
break;
default:
if (!in_quotes)
{
if (*ptr == typdelim)
{
/*
* Delimiters can occur after an element start, an
* element completion, a quoted element
* completion, or a level completion.
*/
if (parse_state != SET_ELEM_STARTED &&
parse_state != SET_ELEM_COMPLETED &&
parse_state != SET_QUOTED_ELEM_COMPLETED &&
parse_state != SET_LEVEL_COMPLETED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected \"%c\" character.",
typdelim)));
if (parse_state == SET_LEVEL_COMPLETED)
parse_state = SET_LEVEL_DELIMITED;
else
parse_state = SET_ELEM_DELIMITED;
itemdone = true;
nelems++;
}
else if (!set_isspace(*ptr))
{
/*
* Other non-space characters must be after a
* level start, after an element start, or after
* an element delimiter. In any case we now must
* be past an element start.
*/
if (parse_state != SET_LEVEL_STARTED &&
parse_state != SET_ELEM_STARTED &&
parse_state != SET_ELEM_DELIMITED)
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Unexpected array element.")));
parse_state = SET_ELEM_STARTED;
}
}
break;
}
if (!itemdone)
ptr++;
}
ptr++;
}
/* only whitespace is allowed after the closing brace */
while (*ptr)
{
if (!set_isspace(*ptr++))
ereturn(escontext, -1,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str),
errdetail("Junk after closing right brace.")));
}
/* special case for an empty array */
if (empty_array)
return 0;
else
return nelems;
}