Hello, I'd like to contribute a "regexp_match()" function as discussed in bug #5469 [1] The aim is to overcome the limitation outlined in the thread above <http://archives.postgresql.org/pgsql-bugs/2010-05/msg00227.php>.
PostgreSQL currently offers the function regexp_matches(), a SRF (which, unless invoked with the 'g' flag, returns at most one result). An user interested in the "nonglob" behaviour, i.e. expecting at most 1 match group, has to adopt special care to avoid records to be dropped from the target list in case no match is found. Being this a rather common use case, I'd like to provide a function with a less astonishing behaviour, i.e. returning a text[] instead of a set of text[] and returning NULL in case no match is found (the 'g' flag wouldn't be supported). The proposed name is regexp_match(), to underline the semantics very similar to regexp_matches() but returning a single value as result. While the symmetry between the names is a pro, an excessive similarity may result confusing, so I wouldn't be surprised if a better name is found. The implementation of the function is very simple, given the infrastructure already available for the other regexp-related functions. I've actually already implemented it (mostly to check how easy or hard it would have been: I had never written a C procedure for PG before): a preliminary patch is attached. If the idea is accepted I will submit a complete patch including documentation and tests. Best regards, -- Daniele
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index c3fd23e..422a94d 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -817,6 +817,42 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS) } /* + * regexp_match() + * Return the first match of a pattern within a string. + */ +Datum +regexp_match(PG_FUNCTION_ARGS) +{ + regexp_matches_ctx *matchctx; + + matchctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0), + PG_GETARG_TEXT_PP(1), + PG_GETARG_TEXT_PP_IF_EXISTS(2), + true, false, true); + + if (matchctx->nmatches) + { + ArrayType *result_ary; + + matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns); + matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns); + result_ary = build_regexp_matches_result(matchctx); + PG_RETURN_ARRAYTYPE_P(result_ary); + } + else + { + PG_RETURN_NULL(); + } +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_match_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_match(fcinfo); +} + +/* * setup_regexp_matches --- do the initial matching for regexp_matches() * or regexp_split() * diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index f2751a4..970036a 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2241,6 +2241,10 @@ DATA(insert OID = 2763 ( regexp_matches PGNSP PGUID 12 1 1 0 f f f t t i 2 0 DESCR("return all match groups for regexp"); DATA(insert OID = 2764 ( regexp_matches PGNSP PGUID 12 1 10 0 f f f t t i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ )); DESCR("return all match groups for regexp"); +DATA(insert OID = 3778 ( regexp_match PGNSP PGUID 12 1 1 0 f f f t f i 2 0 1009 "25 25" _null_ _null_ _null_ _null_ regexp_match_no_flags _null_ _null_ _null_ )); +DESCR("return first match group for regexp"); +DATA(insert OID = 3779 ( regexp_match PGNSP PGUID 12 1 1 0 f f f t f i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ regexp_match _null_ _null_ _null_ )); +DESCR("return first match group for regexp"); DATA(insert OID = 2088 ( split_part PGNSP PGUID 12 1 0 0 f f f t f i 3 0 25 "25 25 23" _null_ _null_ _null_ _null_ split_text _null_ _null_ _null_ )); DESCR("split string by field_sep and return field_num"); DATA(insert OID = 2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 f f f t t i 2 0 25 "25 25" _null_ _null_ _null_ _null_ regexp_split_to_table_no_flags _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index e8f38a6..f6320f1 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -525,6 +525,8 @@ extern Datum textregexreplace(PG_FUNCTION_ARGS); extern Datum similar_escape(PG_FUNCTION_ARGS); extern Datum regexp_matches(PG_FUNCTION_ARGS); extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS); +extern Datum regexp_match(PG_FUNCTION_ARGS); +extern Datum regexp_match_no_flags(PG_FUNCTION_ARGS); extern Datum regexp_split_to_table(PG_FUNCTION_ARGS); extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS); extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers