Hello,

I'd like to contribute a "regexp_match()" function as discussed in bug
#5469 [1] The aim is to overcome the limitation outlined in the thread
above <http://archives.postgresql.org/pgsql-bugs/2010-05/msg00227.php>.

PostgreSQL currently offers the function regexp_matches(), a SRF
(which, unless invoked with the 'g' flag, returns at most one result).
An user interested in the "nonglob" behaviour, i.e. expecting at most
1 match group, has to adopt special care to avoid records to be
dropped from the target list in case no match is found. Being this a
rather common use case, I'd like to provide a function with a less
astonishing behaviour, i.e. returning a text[] instead of a set of
text[] and returning NULL in case no match is found (the 'g' flag
wouldn't be supported).

The proposed name is regexp_match(), to underline the semantics very
similar to regexp_matches() but returning a single value as result.
While the symmetry between the names is a pro, an excessive similarity
may result confusing, so I wouldn't be surprised if a better name is
found. The implementation of the function is very simple, given the
infrastructure already available for the other regexp-related
functions. I've actually already implemented it (mostly to check how
easy or hard it would have been: I had never written a C procedure for
PG before): a preliminary patch is attached.

If the idea is accepted I will submit a complete patch including
documentation and tests.

Best regards,


-- Daniele
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index c3fd23e..422a94d 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -817,6 +817,42 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
 }
 
 /*
+ * regexp_match()
+ *		Return the first match of a pattern within a string.
+ */
+Datum
+regexp_match(PG_FUNCTION_ARGS)
+{
+	regexp_matches_ctx *matchctx;
+
+	matchctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
+									PG_GETARG_TEXT_PP(1),
+									PG_GETARG_TEXT_PP_IF_EXISTS(2),
+									true, false, true);
+
+	if (matchctx->nmatches)
+	{
+		ArrayType  *result_ary;
+
+		matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
+		matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
+		result_ary = build_regexp_matches_result(matchctx);
+		PG_RETURN_ARRAYTYPE_P(result_ary);
+	}
+	else
+	{
+		PG_RETURN_NULL();
+	}
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_match_no_flags(PG_FUNCTION_ARGS)
+{
+	return regexp_match(fcinfo);
+}
+
+/*
  * setup_regexp_matches --- do the initial matching for regexp_matches()
  *		or regexp_split()
  *
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f2751a4..970036a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2241,6 +2241,10 @@ DATA(insert OID =  2763 ( regexp_matches   PGNSP PGUID 12 1 1 0 f f f t t i 2 0
 DESCR("return all match groups for regexp");
 DATA(insert OID =  2764 ( regexp_matches   PGNSP PGUID 12 1 10 0 f f f t t i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ ));
 DESCR("return all match groups for regexp");
+DATA(insert OID =  3778 ( regexp_match     PGNSP PGUID 12 1 1 0 f f f t f i 2 0 1009 "25 25" _null_ _null_ _null_ _null_ regexp_match_no_flags _null_ _null_ _null_ ));
+DESCR("return first match group for regexp");
+DATA(insert OID =  3779 ( regexp_match     PGNSP PGUID 12 1 1 0 f f f t f i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ regexp_match _null_ _null_ _null_ ));
+DESCR("return first match group for regexp");
 DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 1 0 0 f f f t f i 3 0 25 "25 25 23" _null_ _null_ _null_ _null_	split_text _null_ _null_ _null_ ));
 DESCR("split string by field_sep and return field_num");
 DATA(insert OID =  2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 f f f t t i 2 0 25 "25 25" _null_ _null_ _null_ _null_	regexp_split_to_table_no_flags _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index e8f38a6..f6320f1 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -525,6 +525,8 @@ extern Datum textregexreplace(PG_FUNCTION_ARGS);
 extern Datum similar_escape(PG_FUNCTION_ARGS);
 extern Datum regexp_matches(PG_FUNCTION_ARGS);
 extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS);
+extern Datum regexp_match(PG_FUNCTION_ARGS);
+extern Datum regexp_match_no_flags(PG_FUNCTION_ARGS);
 extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
 extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
 extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to