Re: [PATCH] proposal for regexp_count, regexp_instr, regexp_substr and regexp_replace

Gilles Darold Wed, 03 Mar 2021 01:22:35 -0800

My apologies for the links in the head, the email formatting and the
missing patch, I accidently send the email too early.


--

Gilles

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index bf99f82149..88e08b40d2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3097,6 +3097,47 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>regexp_count</primary>
+        </indexterm>
+        <function>regexp_count</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>position</parameter> <type>integer</type> ] [, <parameter>flags</parameter> <type>text</type> ] )
+        <returnvalue>integer</returnvalue>
+       </para>
+       <para>
+        Return the number of times a pattern occurs for a match of a POSIX
+        regular expression to the <parameter>string</parameter>; see
+        <xref linkend="functions-posix-regexp"/>.
+       </para>
+       <para>
+        <literal>regexp_count('123456789012', '\d{3}', 3)</literal>
+        <returnvalue>3</returnvalue>
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>regexp_instr</primary>
+        </indexterm>
+        <function>regexp_instr</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>position</parameter> <type>integer</type> ] [, <parameter>occurence</parameter> <type>integer</type> ] [, <parameter>returnopt</parameter> <type>integer</type> ] [, <parameter>flags</parameter> <type>text</type> ] [, <parameter>group</parameter> <type>integer</type> ] )
+        <returnvalue>integer</returnvalue>
+       </para>
+       <para>
+       Return the position within <parameter>string</parameter> where the
+       match of a POSIX regular expression occurs. It returns an integer
+       indicating the beginning or ending position of the matched substring,
+       depending on the value of the <parameter>returnopt</parameter> argument
+       (default beginning). If no match is found, then the function returns 0;
+       see <xref linkend="functions-posix-regexp"/>.
+       </para>
+       <para>
+        <literal>regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4)</literal>
+        <returnvalue>7</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -3157,6 +3198,24 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>regexp_substr</primary>
+        </indexterm>
+        <function>regexp_substr</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>position</parameter> <type>integer</type> ] [, <parameter>occurence</parameter> <type>integer</type> ] [, <parameter>flags</parameter> <type>text</type> ] [, <parameter>group</parameter> <type>integer</type> ] )
+        <returnvalue>text</returnvalue>
+       </para>
+       <para>
+       Return the substring within <parameter>string</parameter> corresponding to the
+       match of a POSIX regular expression; see <xref linkend="functions-posix-regexp"/>.
+       </para>
+       <para>
+        <literal>regexp_substr('1234567890 1234557890', '(123)(4(5[56])(78))', 1, 2, 'i', 3)</literal>
+        <returnvalue>55</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
@@ -5295,6 +5354,15 @@ substring('foobar' similar '#"o_b#"%' escape '#')    <lineannotation>NULL</linea
    <indexterm>
     <primary>regexp_split_to_array</primary>
    </indexterm>
+   <indexterm>
+    <primary>regexp_count</primary>
+   </indexterm>
+   <indexterm>
+    <primary>regexp_instr</primary>
+   </indexterm>
+   <indexterm>
+    <primary>regexp_substr</primary>
+   </indexterm>
 
    <para>
     <xref linkend="functions-posix-table"/> lists the available
@@ -5669,6 +5737,132 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
     in practice.  Other software systems such as Perl use similar definitions.
    </para>
 
+    <para>
+     The <function>regexp_count</function> function returns the number of
+     captured substring(s) resulting from matching a POSIX regular
+     expression pattern to a string.  It has the syntax <function>regexp_replace</function>(
+     <replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>,
+     <replaceable>position</replaceable> </optional> <optional>, <replaceable>flags</replaceable> </optional>).
+     <replaceable>pattern</replaceable> is searched in <replaceable>string</replaceable> starting
+     from an optional <replaceable>position</replaceable> or from the beginning of <replaceable>string</replaceable>
+     by default.  The <replaceable>flags</replaceable> parameter is an optional text string
+     containing zero or more single-letter flags that change the function's behavior.
+     <function>regexp_count</function> accepts all the flags
+     shown in <xref linkend="posix-embedded-options-table"/>.
+     The <literal>g</literal> flag is forced internally to count all matches.
+     This function returns 0 if there is no match or the number of match as
+     an integer.
+    </para>
+
+   <para>
+    Some examples:
+<programlisting>
+SELECT regexp_count('123123123123', '\d{3}', 1);
+ regexp_count 
+--------------
+            4
+(1 row)
+
+SELECT regexp_count('Hello'||CHR(10)||'world!', '^world!$', 1, 'm');
+ regexp_count 
+--------------
+            1
+(1 row)
+</programlisting>
+   </para>
+
+    <para>
+     The <function>regexp_instr</function> function returns the beginning or ending
+     position of the matched substring resulting from matching a POSIX regular
+     expression pattern to a string.  It has the syntax <function>regexp_instr</function>(
+     <replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>,
+     <replaceable>position</replaceable> </optional> <optional>, <replaceable>occurrence</replaceable> </optional>
+     <optional>, <replaceable>returnopt</replaceable> </optional>
+     <optional>, <replaceable>flags</replaceable> </optional>
+     <optional>, <replaceable>group</replaceable> </optional>).
+     <replaceable>pattern</replaceable> is searched in <replaceable>string</replaceable> starting
+     from an optional <replaceable>position</replaceable> or from the beginning
+     of <replaceable>string</replaceable> by default.  <replaceable>occurrence</replaceable>
+     indicates which occurrence of <replaceable>pattern</replaceable> in <replaceable>string</replaceable>
+     should be searched.  When <replaceable>returnopt</replaceable> is set to 0 (default) the function
+     returns the position of the first character of the occurrence.  When set to 1 returns the position
+     of the character after the occurrence.
+     The <replaceable>flags</replaceable> parameter is an optional text string
+     containing zero or more single-letter flags that change the function's behavior.
+     <function>regexp_count</function> accepts all the flags
+     shown in <xref linkend="posix-embedded-options-table"/>.
+     The <literal>g</literal> flag is forced internally to track all matches.
+     For a pattern with capture groups, <replaceable>group</replaceable> is an integer indicating
+     which capture in pattern is the target of the function.  A capture group is a part of the pattern
+     enclosed in parentheses. Capture groups can be nested.  They are numbered in order in which their
+     left parentheses appear in pattern. If <replaceable>group</replaceable> is zero, then the position
+     of the entire substring that matches the pattern is returned. If <replaceable>pattern</replaceable>
+     does not have at least <replaceable>group</replaceable> capture group, the function returns zero.
+     This function returns 0 if there is no match or the starting or ending position
+     of match as an integer.
+    </para>
+
+   <para>
+    Some examples:
+<programlisting>
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[^ ]+', 1, 6);
+ regexp_instr 
+--------------
+           32
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
+ regexp_instr 
+--------------
+            5
+(1 row)
+</programlisting>
+   </para>
+
+    <para>
+     The <function>regexp_substr</function> function returns the
+     matched substring resulting from matching a POSIX regular
+     expression pattern to a string.  It has the syntax <function>regexp_substr</function>(
+     <replaceable>string</replaceable>, <replaceable>pattern</replaceable> <optional>,
+     <replaceable>position</replaceable> </optional> <optional>, <replaceable>occurrence</replaceable> </optional>
+     <optional>, <replaceable>flags</replaceable> </optional>
+     <optional>, <replaceable>group</replaceable> </optional>).
+     <replaceable>pattern</replaceable> is searched in <replaceable>string</replaceable> starting
+     from an optional <replaceable>position</replaceable> or from the beginning
+     of <replaceable>string</replaceable> by default.  <replaceable>occurrence</replaceable>
+     indicates which occurrence of <replaceable>pattern</replaceable> in <replaceable>string</replaceable>
+     should be searched.  The <replaceable>flags</replaceable> parameter is an optional text string
+     containing zero or more single-letter flags that change the function's behavior.
+     <function>regexp_count</function> accepts all the flags
+     shown in <xref linkend="posix-embedded-options-table"/>.
+     The <literal>g</literal> flag is forced internally to track all matches.
+     For a pattern with capture groups, <replaceable>group</replaceable> is an integer indicating
+     which capture in pattern is the target of the function.  A capture group is a part of the pattern
+     enclosed in parentheses. Capture groups can be nested.  They are numbered in order in which their
+     left parentheses appear in pattern. If <replaceable>group</replaceable> is zero, then the position
+     of the entire substring that matches the pattern is returned. If <replaceable>pattern</replaceable>
+     does not have at least <replaceable>group</replaceable> capture group, the function returns zero.
+     This function returns NULL if there is no match or the substring of match.
+    </para>
+
+   <para>
+    Some examples:
+<programlisting>
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+', 1, 2);
+ regexp_substr 
+---------------
+ , FR
+(1 row)
+
+SELECT regexp_substr('1234567890 1234557890', '(123)(4(5[56])(78))', 1, 2, 'i', 3);
+ regexp_substr 
+---------------
+ 55
+(1 row)
+</programlisting>
+   </para>
+
+
 <!-- derived from the re_syntax.n man page -->
 
    <sect3 id="posix-syntax-details">
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index a32c5c82ab..42c25cdd75 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -120,6 +120,7 @@ static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
 static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
 static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
 
+static text *enclose_with_parenthesis(text *str);
 
 /*
  * RE_compile_and_cache - compile a RE, caching if possible
@@ -555,7 +556,6 @@ texticregexne(PG_FUNCTION_ARGS)
 										   0, NULL));
 }
 
-
 /*
  * textregexsubstr()
  *		Return a substring matched by a regular expression.
@@ -1063,6 +1063,350 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
 	return regexp_matches(fcinfo);
 }
 
+/*
+ * regexp_count()
+ *		Return the number of matches of a pattern within a string.
+ */
+Datum
+regexp_count(PG_FUNCTION_ARGS)
+{
+	text	   *orig_str = NULL;
+	text	   *pattern = PG_GETARG_TEXT_PP(1);
+	int	   start = 1;
+	text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
+
+	pg_re_flags re_flags;
+	regexp_matches_ctx *matchctx;
+
+	if (PG_NARGS() > 2)
+		start = PG_GETARG_INT32(2);
+
+	if (start < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("negative start position not allowed.")));
+
+	/* regexp_count(string, pattern, start[, flags]) */
+	if (start > 1)
+		orig_str = DatumGetTextPP(DirectFunctionCall2(text_substr_no_len,
+									   PG_GETARG_DATUM(0),
+									   Int32GetDatum(start)
+									   ));
+	else
+		orig_str = PG_GETARG_TEXT_PP(0);
+
+	/* Determine options */
+	parse_re_flags(&re_flags, flags);
+
+	/* this function require flag 'g' */
+	re_flags.glob = true;
+
+	matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
+									PG_GET_COLLATION(), true, false, false);
+
+	PG_RETURN_INT32(matchctx->nmatches);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_count_no_start(PG_FUNCTION_ARGS)
+{
+	return regexp_count(fcinfo);
+}
+
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_count_no_flags(PG_FUNCTION_ARGS)
+{
+	return regexp_count(fcinfo);
+}
+
+/*
+ * Return text string between parenthesis
+ */
+static text *
+enclose_with_parenthesis(text *str)
+{
+	int        len = VARSIZE_ANY_EXHDR(str);
+	text       *result;
+	char       *ptr;
+
+	result = palloc(len + VARHDRSZ + 2);
+	SET_VARSIZE(result, len + VARHDRSZ + 2);
+	ptr = VARDATA(result);
+	memcpy(ptr, "(", 1);
+	memcpy(ptr+1, VARDATA_ANY(str), len);
+	memcpy(ptr+len+1, ")", 1);
+
+	return result;
+}
+
+/*
+ * regexp_instr()
+ *		Return the position within the string where the match was located
+ */
+Datum
+regexp_instr(PG_FUNCTION_ARGS)
+{
+	text	   *orig_str = NULL;
+	text	   *pattern = NULL;
+	text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
+	int        start = 1;
+	int        occurrence = 1;
+	int        return_opt = 0;
+	int        subexpr = 0;
+	int        pos;
+
+	pg_re_flags re_flags;
+	regexp_matches_ctx *matchctx;
+
+	/* regexp_instr(string, pattern, start) */
+	if (PG_NARGS() > 2)
+		start = PG_GETARG_INT32(2);
+
+	if (start < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("negative start position not allowed.")));
+
+	if (start > 1)
+		orig_str = DatumGetTextPP(DirectFunctionCall2(text_substr_no_len,
+									   PG_GETARG_DATUM(0),
+									   Int32GetDatum(start)
+									   ));
+	else
+		/* regexp_instr(string, pattern) */
+		orig_str = PG_GETARG_TEXT_PP(0);
+
+	if (orig_str == NULL)
+		PG_RETURN_NULL();
+
+	/* regexp_instr(string, pattern, start, occurrence) */
+	if (PG_NARGS() > 3)
+		occurrence = PG_GETARG_INT32(3);
+
+	if (occurrence <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("occurrence value must be greater than 0.")));
+
+	/* regexp_instr(string, pattern, start, occurrence, return_opt) */
+	if (PG_NARGS() > 4)
+		return_opt = PG_GETARG_INT32(4);
+
+	if (return_opt != 0 && return_opt != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("return option value must be 0 or 1.")));
+
+	/* regexp_instr(string, pattern, start, occurrence, return_opt, flags, subexpr) */
+	if (PG_NARGS() > 6)
+		subexpr = PG_GETARG_INT32(6);
+	if (subexpr < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("number of capture group must be a positive value.")));
+
+	/*
+	 * If subexpr is zero (default), then the position of the entire
+	 * substring that matches the pattern is returned. Otherwise we
+	 * will exactly register the subexpressions given in the pattern.
+	 * Enclose pattern between parenthesis to register the position
+	 * of the entire substring.
+	 */
+	pattern = enclose_with_parenthesis(PG_GETARG_TEXT_PP(1));
+
+	/* Determine options */
+	parse_re_flags(&re_flags, flags);
+	/* this function require flag 'g' */
+	re_flags.glob = true;
+
+	matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
+									PG_GET_COLLATION(), true, false, false);
+
+	/* If no match is found, then the function returns 0 */
+	if (matchctx->nmatches == 0)
+		PG_RETURN_INT32(0);
+
+	/* When occurrence exceed matches return 0 */
+	if (occurrence > matchctx->nmatches)
+		PG_RETURN_INT32(0);
+
+	/* When subexpr exceed number of subexpression return 0 */
+	if (subexpr > matchctx->npatterns - 1)
+		PG_RETURN_INT32(0);
+
+	/*
+	 * Returns the position of the first character of the occurrence
+	 * or for subexpression in this occurrence.
+	 */
+	pos = (occurrence*matchctx->npatterns*2)-((matchctx->npatterns-subexpr)*2);
+	if (return_opt == 1)
+		pos += 1;
+
+	PG_RETURN_INT32(matchctx->match_locs[pos]+start);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_start(PG_FUNCTION_ARGS)
+{
+	return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_occurrence(PG_FUNCTION_ARGS)
+{
+	return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_return_opt(PG_FUNCTION_ARGS)
+{
+	return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_flags(PG_FUNCTION_ARGS)
+{
+	return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_subexpr(PG_FUNCTION_ARGS)
+{
+	return regexp_instr(fcinfo);
+}
+
+/*
+ * regexp_substr()
+ *		Return the substring within the string that match a regular
+ *		expression pattern
+ */
+Datum
+regexp_substr(PG_FUNCTION_ARGS)
+{
+	text	   *orig_str = NULL;
+	text	   *pattern = NULL;
+	text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4);
+	int        start = 1;
+	int        occurrence = 1;
+	int        subexpr = 0;
+	int        so, eo, pos;
+
+	pg_re_flags re_flags;
+	regexp_matches_ctx *matchctx;
+
+	if (PG_NARGS() > 2)
+		start = PG_GETARG_INT32(2);
+
+	if (start < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("negative start position not allowed.")));
+
+	/* regexp_substr(string, pattern, start) */
+	if (start > 1)
+		orig_str = DatumGetTextPP(DirectFunctionCall2(text_substr_no_len,
+									   PG_GETARG_DATUM(0),
+									   Int32GetDatum(start)
+									   ));
+	else
+		/* regexp_substr(string, pattern) */
+		orig_str = PG_GETARG_TEXT_PP(0);
+
+	if (orig_str == NULL)
+		PG_RETURN_NULL();
+
+	/* regexp_substr(string, pattern, start, occurrence) */
+	if (PG_NARGS() > 3)
+		occurrence = PG_GETARG_INT32(3);
+	if (occurrence <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("occurrence value must be greater than 0.")));
+
+	/* regexp_substr(string, pattern, start, occurrence, flags, subexpr) */
+	if (PG_NARGS() > 5)
+		subexpr = PG_GETARG_INT32(5);
+	if (subexpr < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("number of capture group must be a positive value.")));
+
+	/*
+	 * If subexpr is zero (default), then the position of the entire
+	 * substring that matches the pattern is returned. Otherwise we
+	 * will exactly register the subexpressions given in the pattern.
+	 * Enclose pattern between parenthesis to register the position
+	 * of the entire substring.
+	 */
+	pattern = enclose_with_parenthesis(PG_GETARG_TEXT_PP(1));
+
+	/* Determine options */
+	parse_re_flags(&re_flags, flags);
+	/* this function require flag 'g' */
+	re_flags.glob = true;
+
+	matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
+									PG_GET_COLLATION(), true, false, false);
+
+	/* If no match is found, then the function returns NULL */
+	if (matchctx->nmatches == 0)
+		PG_RETURN_NULL();
+
+	/* When occurrence exceed matches return NULL */
+	if (occurrence > matchctx->nmatches)
+		PG_RETURN_NULL();
+
+	/* When subexpr exceed number of subexpression return NULL */
+	if (subexpr > matchctx->npatterns - 1)
+		PG_RETURN_NULL();
+
+	/* Returns the substring corresponding to the occurrence. */
+	pos = (occurrence*matchctx->npatterns*2)-((matchctx->npatterns-subexpr)*2);
+	so = matchctx->match_locs[pos]+1;
+	eo = matchctx->match_locs[pos+1]+1;
+
+	 PG_RETURN_DATUM(DirectFunctionCall3(text_substr,
+									   PointerGetDatum(matchctx->orig_str),
+									   Int32GetDatum(so),
+									   Int32GetDatum(eo - so)));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_start(PG_FUNCTION_ARGS)
+{
+	return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_occurrence(PG_FUNCTION_ARGS)
+{
+	return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_flags(PG_FUNCTION_ARGS)
+{
+	return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_subexpr(PG_FUNCTION_ARGS)
+{
+	return regexp_substr(fcinfo);
+}
+
 /*
  * setup_regexp_matches --- do the initial matching for regexp_match
  *		and regexp_split functions
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 3d3974f467..aae9226ec5 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3557,6 +3557,48 @@
   proname => 'regexp_matches', prorows => '10', proretset => 't',
   prorettype => '_text', proargtypes => 'text text text',
   prosrc => 'regexp_matches' },
+{ oid => '9614', descr => 'count match(es) for regexp',
+  proname => 'regexp_count', prorettype => 'int4', proargtypes => 'text text',
+  prosrc => 'regexp_count_no_start' },
+{ oid => '9615', descr => 'count match(es) for regexp',
+  proname => 'regexp_count', prorettype => 'int4', proargtypes => 'text text int4',
+  prosrc => 'regexp_count_no_flags' },
+{ oid => '9616', descr => 'count match(es) for regexp',
+  proname => 'regexp_count', prorettype => 'int4',
+  proargtypes => 'text text int4 text', prosrc => 'regexp_count' },
+{ oid => '9617', descr => 'position where the match for regexp was located',
+  proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text',
+  prosrc => 'regexp_instr_no_start' },
+{ oid => '9618', descr => 'position where the match for regexp was located',
+  proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text int4',
+  prosrc => 'regexp_instr_no_occurrence' },
+{ oid => '9619', descr => 'position where the match for regexp was located',
+  proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text int4 int4',
+  prosrc => 'regexp_instr_no_return_opt' },
+{ oid => '9620', descr => 'position where the match for regexp was located',
+  proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text int4 int4 int4',
+  prosrc => 'regexp_instr_no_flags' },
+{ oid => '9621', descr => 'position where the match for regexp was located',
+  proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text int4 int4 int4 text',
+  prosrc => 'regexp_instr_no_subexpr' },
+{ oid => '9622', descr => 'position where the match for regexp was located',
+  proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text int4 int4 int4 text int4',
+  prosrc => 'regexp_instr' },
+{ oid => '9623', descr => 'substring that match the regexp pattern',
+  proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text',
+  prosrc => 'regexp_substr_no_start' },
+{ oid => '9624', descr => 'substring that match the regexp pattern',
+  proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text int4',
+  prosrc => 'regexp_substr_no_occurrence' },
+{ oid => '9625', descr => 'substring that match the regexp pattern',
+  proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text int4 int4',
+  prosrc => 'regexp_substr_no_flags' },
+{ oid => '9626', descr => 'substring that match the regexp pattern',
+  proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text int4 int4 text',
+  prosrc => 'regexp_substr_no_subexpr' },
+{ oid => '9627', descr => 'substring that match the regexp pattern',
+  proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text int4 int4 text int4',
+  prosrc => 'regexp_substr' },
 { oid => '2088', descr => 'split string by field_sep and return field_num',
   proname => 'split_part', prorettype => 'text',
   proargtypes => 'text text int4', prosrc => 'split_part' },
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index fb4573d85f..99372be6f0 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -905,6 +905,393 @@ SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ov
 ERROR:  regexp_split_to_table() does not support the "global" option
 SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g');
 ERROR:  regexp_split_to_array() does not support the "global" option
+--  count all matches for regexp
+SELECT regexp_count('123123123123123', '(12)3');
+ regexp_count 
+--------------
+            5
+(1 row)
+
+-- count all matches with start position
+SELECT regexp_count('123123123123', '123', 0);
+ regexp_count 
+--------------
+            4
+(1 row)
+
+SELECT regexp_count('123123123123', '123', 1);
+ regexp_count 
+--------------
+            4
+(1 row)
+
+SELECT regexp_count('123123123123', '123', 3);
+ regexp_count 
+--------------
+            3
+(1 row)
+
+SELECT regexp_count('123123123123', '123', -3);
+ERROR:  negative start position not allowed.
+-- count all matches in NULL string with a start position
+SELECT regexp_count(NULL, '123', 3);
+ regexp_count 
+--------------
+           \N
+(1 row)
+
+-- count all matches with a start position greater than string length
+SELECT regexp_count('123', '123', 10);
+ regexp_count 
+--------------
+            0
+(1 row)
+
+-- count all matches from different regexp
+SELECT regexp_count('ABC123', '[A-Z]'), regexp_count('A1B2C3', '[A-Z]');
+ regexp_count | regexp_count 
+--------------+--------------
+            3 |            3
+(1 row)
+
+SELECT regexp_count('ABC123', '[A-Z][0-9]'), regexp_count('A1B2C3', '[A-Z][0-9]');
+ regexp_count | regexp_count 
+--------------+--------------
+            1 |            3
+(1 row)
+
+SELECT regexp_count('ABC123', '[A-Z][0-9]{2}'), regexp_count('A1B2C3', '[A-Z][0-9]{2}');
+ regexp_count | regexp_count 
+--------------+--------------
+            1 |            0
+(1 row)
+
+SELECT regexp_count('ABC123', '([A-Z][0-9]){2}'), regexp_count('A1B2C3', '([A-Z][0-9]){2}');
+ regexp_count | regexp_count 
+--------------+--------------
+            0 |            1
+(1 row)
+
+SELECT regexp_count('ABC123A5', '^[A-Z][0-9]'), regexp_count('A1B2C3', '^[A-Z][0-9]');
+ regexp_count | regexp_count 
+--------------+--------------
+            0 |            1
+(1 row)
+
+SELECT regexp_count('ABC123', '[A-Z][0-9]{2}'), regexp_count('A1B2C34', '[A-Z][0-9]{2}');
+ regexp_count | regexp_count 
+--------------+--------------
+            1 |            1
+(1 row)
+
+-- count matches with newline case insensivity
+SELECT regexp_count('a'||CHR(10)||'d', 'a.d');
+ regexp_count 
+--------------
+            1
+(1 row)
+
+SELECT regexp_count('a'||CHR(10)||'d', 'a.d', 1, 's');
+ regexp_count 
+--------------
+            1
+(1 row)
+
+-- count matches with newline case sensivity
+SELECT regexp_count('a'||CHR(10)||'d', 'a.d', 1, 'n');
+ regexp_count 
+--------------
+            0
+(1 row)
+
+-- count not multiline matches
+SELECT regexp_count('a'||CHR(10)||'d', '^d$');
+ regexp_count 
+--------------
+            0
+(1 row)
+
+-- count multiline matches
+SELECT regexp_count('a'||CHR(10)||'d', '^d$', 1, 'm');
+ regexp_count 
+--------------
+            1
+(1 row)
+
+SELECT regexp_count('Hello'||CHR(10)||'world!', '^world!$', 1, 'm'); -- 1
+ regexp_count 
+--------------
+            1
+(1 row)
+
+DROP TABLE IF EXISTS regexp_temp;
+NOTICE:  table "regexp_temp" does not exist, skipping
+CREATE TABLE regexp_temp(fullname varchar(20), email varchar(20));
+INSERT INTO regexp_temp (fullname, email) VALUES ('John Doe', 'john...@example.com');
+INSERT INTO regexp_temp (fullname, email) VALUES ('Jane Doe', 'janedoe');
+-- count matches case sensitive
+SELECT fullname, regexp_count(fullname, 'e', 1, 'c') FROM regexp_temp;
+ fullname | regexp_count 
+----------+--------------
+ John Doe |            1
+ Jane Doe |            2
+(2 rows)
+
+SELECT fullname, regexp_count(fullname, 'D', 1, 'c') FROM regexp_temp;
+ fullname | regexp_count 
+----------+--------------
+ John Doe |            1
+ Jane Doe |            1
+(2 rows)
+
+SELECT fullname, regexp_count(fullname, 'd', 1, 'c') FROM regexp_temp;
+ fullname | regexp_count 
+----------+--------------
+ John Doe |            0
+ Jane Doe |            0
+(2 rows)
+
+-- count matches case insensitive
+SELECT fullname, regexp_count(fullname, 'E', 1, 'i') FROM regexp_temp;
+ fullname | regexp_count 
+----------+--------------
+ John Doe |            1
+ Jane Doe |            2
+(2 rows)
+
+SELECT fullname, regexp_count(fullname, 'do', 1, 'i') FROM regexp_temp;
+ fullname | regexp_count 
+----------+--------------
+ John Doe |            1
+ Jane Doe |            1
+(2 rows)
+
+-- return the start position of the 6th occurence starting at beginning of the string
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[^ ]+', 1, 6);
+ regexp_instr 
+--------------
+           32
+(1 row)
+
+-- return the start position of the 5th occurence starting after the first word
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[^ ]+', 7, 5, 0);
+ regexp_instr 
+--------------
+           32
+(1 row)
+
+-- return the ending position of the 5th occurence starting after the first word
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[^ ]+', 7, 5, 0);
+ regexp_instr 
+--------------
+           32
+(1 row)
+
+-- return the ending position of the 2nd occurence starting after the first word
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[T|Z|S][[:alpha:]]{5}', 7, 2, 1, 'i');
+ regexp_instr 
+--------------
+           30
+(1 row)
+
+-- return the starting position corresponding to the different capture group
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
+ regexp_instr 
+--------------
+            1
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
+ regexp_instr 
+--------------
+            1
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
+ regexp_instr 
+--------------
+            4
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
+ regexp_instr 
+--------------
+            5
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
+ regexp_instr 
+--------------
+            7
+(1 row)
+
+-- return the starting position corresponding to a non existant capture group
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
+ regexp_instr 
+--------------
+            0
+(1 row)
+
+-- Same but with the ending position
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
+ regexp_instr 
+--------------
+            9
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
+ regexp_instr 
+--------------
+            4
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
+ regexp_instr 
+--------------
+            9
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
+ regexp_instr 
+--------------
+            7
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
+ regexp_instr 
+--------------
+            9
+(1 row)
+
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
+ regexp_instr 
+--------------
+            0
+(1 row)
+
+-- start position of a valid email
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+') "valid_email" FROM regexp_temp;
+        email        | valid_email 
+---------------------+-------------
+ john...@example.com |           1
+ janedoe             |           0
+(2 rows)
+
+-- ending position of a valid email
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+', 1, 1, 1) "valid_email" FROM regexp_temp;
+        email        | valid_email 
+---------------------+-------------
+ john...@example.com |          20
+ janedoe             |           0
+(2 rows)
+
+-- start position of first capture group in the email (the dot part)
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+', 1, 1, 0, 'i', 1) FROM regexp_temp;
+        email        | regexp_instr 
+---------------------+--------------
+ john...@example.com |           16
+ janedoe             |            0
+(2 rows)
+
+-- ending position of first capture group in the email (the dot part)
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+', 1, 1, 1, 'i', 1) FROM regexp_temp;
+        email        | regexp_instr 
+---------------------+--------------
+ john...@example.com |           20
+ janedoe             |            0
+(2 rows)
+
+-- test negative values
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', -1, 1, 1, 'i', 1);
+ERROR:  negative start position not allowed.
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, -1, 1, 'i', 1);
+ERROR:  occurrence value must be greater than 0.
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, -1, 'i', 1);
+ERROR:  return option value must be 0 or 1.
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', -1);
+ERROR:  number of capture group must be a positive value.
+-- return the substring matching the regexp
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+');
+ regexp_substr  
+----------------
+ , zipcode town
+(1 row)
+
+-- return the substring matching the regexp
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+', 24);
+ regexp_substr 
+---------------
+ , FR
+(1 row)
+
+-- return the substring matching the regexp at the first occurrence
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+', 1, 1);
+ regexp_substr  
+----------------
+ , zipcode town
+(1 row)
+
+-- return the substring matching the regexp at the second occurrence
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+', 1, 2);
+ regexp_substr 
+---------------
+ , FR
+(1 row)
+
+-- case sensitivity substring search
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1);
+ regexp_substr 
+---------------
+ \N
+(1 row)
+
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1, 'i');
+ regexp_substr  
+----------------
+ , zipcode town
+(1 row)
+
+-- case sensitivity substring search with no capture group
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1, 'i', 0);
+ regexp_substr  
+----------------
+ , zipcode town
+(1 row)
+
+-- case sensitivity substring search with non existing capture group
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1, 'i', 0);
+ regexp_substr  
+----------------
+ , zipcode town
+(1 row)
+
+-- return the substring matching the regexp at different occurrence and capture group
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
+ regexp_substr 
+---------------
+ 78
+(1 row)
+
+SELECT regexp_substr('1234567890 1234557890', '(123)(4(5[56])(78))', 1, 2, 'i', 3);
+ regexp_substr 
+---------------
+ 55
+(1 row)
+
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
+ regexp_substr 
+---------------
+ 12345678
+(1 row)
+
+-- test negative values
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', -1, 1, 'i', 4);
+ERROR:  negative start position not allowed.
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, -1, 'i', 4);
+ERROR:  occurrence value must be greater than 0.
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, 1, 'i', -4);
+ERROR:  number of capture group must be a positive value.
+DROP TABLE IF EXISTS regexp_temp;
 -- change NULL-display back
 \pset null ''
 -- E021-11 position expression
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 57a48c9d0b..30d44cc6da 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -255,6 +255,110 @@ SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e',
 SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g') AS foo;
 SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g');
 
+--  count all matches for regexp
+SELECT regexp_count('123123123123123', '(12)3');
+-- count all matches with start position
+SELECT regexp_count('123123123123', '123', 0);
+SELECT regexp_count('123123123123', '123', 1);
+SELECT regexp_count('123123123123', '123', 3);
+SELECT regexp_count('123123123123', '123', -3);
+-- count all matches in NULL string with a start position
+SELECT regexp_count(NULL, '123', 3);
+-- count all matches with a start position greater than string length
+SELECT regexp_count('123', '123', 10);
+-- count all matches from different regexp
+SELECT regexp_count('ABC123', '[A-Z]'), regexp_count('A1B2C3', '[A-Z]');
+SELECT regexp_count('ABC123', '[A-Z][0-9]'), regexp_count('A1B2C3', '[A-Z][0-9]');
+SELECT regexp_count('ABC123', '[A-Z][0-9]{2}'), regexp_count('A1B2C3', '[A-Z][0-9]{2}');
+SELECT regexp_count('ABC123', '([A-Z][0-9]){2}'), regexp_count('A1B2C3', '([A-Z][0-9]){2}');
+SELECT regexp_count('ABC123A5', '^[A-Z][0-9]'), regexp_count('A1B2C3', '^[A-Z][0-9]');
+SELECT regexp_count('ABC123', '[A-Z][0-9]{2}'), regexp_count('A1B2C34', '[A-Z][0-9]{2}');
+-- count matches with newline case insensivity
+SELECT regexp_count('a'||CHR(10)||'d', 'a.d');
+SELECT regexp_count('a'||CHR(10)||'d', 'a.d', 1, 's');
+-- count matches with newline case sensivity
+SELECT regexp_count('a'||CHR(10)||'d', 'a.d', 1, 'n');
+-- count not multiline matches
+SELECT regexp_count('a'||CHR(10)||'d', '^d$');
+-- count multiline matches
+SELECT regexp_count('a'||CHR(10)||'d', '^d$', 1, 'm');
+SELECT regexp_count('Hello'||CHR(10)||'world!', '^world!$', 1, 'm'); -- 1
+
+DROP TABLE IF EXISTS regexp_temp;
+CREATE TABLE regexp_temp(fullname varchar(20), email varchar(20));
+INSERT INTO regexp_temp (fullname, email) VALUES ('John Doe', 'john...@example.com');
+INSERT INTO regexp_temp (fullname, email) VALUES ('Jane Doe', 'janedoe');
+-- count matches case sensitive
+SELECT fullname, regexp_count(fullname, 'e', 1, 'c') FROM regexp_temp;
+SELECT fullname, regexp_count(fullname, 'D', 1, 'c') FROM regexp_temp;
+SELECT fullname, regexp_count(fullname, 'd', 1, 'c') FROM regexp_temp;
+-- count matches case insensitive
+SELECT fullname, regexp_count(fullname, 'E', 1, 'i') FROM regexp_temp;
+SELECT fullname, regexp_count(fullname, 'do', 1, 'i') FROM regexp_temp;
+
+-- return the start position of the 6th occurence starting at beginning of the string
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[^ ]+', 1, 6);
+-- return the start position of the 5th occurence starting after the first word
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[^ ]+', 7, 5, 0);
+-- return the ending position of the 5th occurence starting after the first word
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[^ ]+', 7, 5, 0);
+-- return the ending position of the 2nd occurence starting after the first word
+SELECT regexp_instr('number of your street, zipcode thetown, FR', '[T|Z|S][[:alpha:]]{5}', 7, 2, 1, 'i');
+-- return the starting position corresponding to the different capture group
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
+-- return the starting position corresponding to a non existant capture group
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
+-- Same but with the ending position
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
+-- start position of a valid email
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+') "valid_email" FROM regexp_temp;
+-- ending position of a valid email
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+', 1, 1, 1) "valid_email" FROM regexp_temp;
+-- start position of first capture group in the email (the dot part)
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+', 1, 1, 0, 'i', 1) FROM regexp_temp;
+-- ending position of first capture group in the email (the dot part)
+SELECT email, regexp_instr(email, '\w+@\w+(\.\w+)+', 1, 1, 1, 'i', 1) FROM regexp_temp;
+-- test negative values
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', -1, 1, 1, 'i', 1);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, -1, 1, 'i', 1);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, -1, 'i', 1);
+SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', -1);
+
+-- return the substring matching the regexp
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+');
+-- return the substring matching the regexp
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+', 24);
+-- return the substring matching the regexp at the first occurrence
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+', 1, 1);
+-- return the substring matching the regexp at the second occurrence
+SELECT regexp_substr('number of your street, zipcode town, FR', ',[^,]+', 1, 2);
+-- case sensitivity substring search
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1);
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1, 'i');
+-- case sensitivity substring search with no capture group
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1, 'i', 0);
+-- case sensitivity substring search with non existing capture group
+SELECT regexp_substr('number of your street, zipcode town, FR', ',\s+[Zf][^,]+', 1, 1, 'i', 0);
+-- return the substring matching the regexp at different occurrence and capture group
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
+SELECT regexp_substr('1234567890 1234557890', '(123)(4(5[56])(78))', 1, 2, 'i', 3);
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
+-- test negative values
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', -1, 1, 'i', 4);
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, -1, 'i', 4);
+SELECT regexp_substr('1234567890 1234567890', '(123)(4(56)(78))', 1, 1, 'i', -4);
+
+DROP TABLE IF EXISTS regexp_temp;
+
 -- change NULL-display back
 \pset null ''

Re: [PATCH] proposal for regexp_count, regexp_instr, regexp_substr and regexp_replace

Reply via email to