hi. rebased, and rechecked it again.
seems no changes to the citext extension are required, since the citext data type does not define specialized substring/substr function. -- jian https://www.enterprisedb.com
From d30592f190860cc68d08f956ac6853aa2c60e1bf Mon Sep 17 00:00:00 2001 From: jian he <[email protected]> Date: Wed, 31 Dec 2025 15:26:31 +0800 Subject: [PATCH v10 1/2] Add argument names to the substr functions This change allows substr function to be called using named-argument notation, which can be helpful for readability, particularly for the ones with many arguments. No changes to the citext extension are required, since the citext data type does not define a specialized substr function. commitfest: https://commitfest.postgresql.org/patch/5524 Discussion: https://postgr.es/m/CACJufxHTBkymh06D4mGKNe1YfRNFN+gFBybmygWk=ptmqu0...@mail.gmail.com --- doc/src/sgml/func/func-binarystring.sgml | 4 ++-- doc/src/sgml/func/func-string.sgml | 4 ++-- src/include/catalog/pg_proc.dat | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/func/func-binarystring.sgml b/doc/src/sgml/func/func-binarystring.sgml index b256381e01f..58051595126 100644 --- a/doc/src/sgml/func/func-binarystring.sgml +++ b/doc/src/sgml/func/func-binarystring.sgml @@ -571,11 +571,11 @@ <indexterm> <primary>substr</primary> </indexterm> - <function>substr</function> ( <parameter>bytes</parameter> <type>bytea</type>, <parameter>start</parameter> <type>integer</type> <optional>, <parameter>count</parameter> <type>integer</type> </optional> ) + <function>substr</function> ( <parameter>source</parameter> <type>bytea</type>, <parameter>start</parameter> <type>integer</type> <optional>, <parameter>count</parameter> <type>integer</type> </optional> ) <returnvalue>bytea</returnvalue> </para> <para> - Extracts the substring of <parameter>bytes</parameter> starting at + Extracts the substring of <parameter>source</parameter> starting at the <parameter>start</parameter>'th byte, and extending for <parameter>count</parameter> bytes if that is specified. (Same diff --git a/doc/src/sgml/func/func-string.sgml b/doc/src/sgml/func/func-string.sgml index 7ad1436e5f8..3325ade065a 100644 --- a/doc/src/sgml/func/func-string.sgml +++ b/doc/src/sgml/func/func-string.sgml @@ -1390,11 +1390,11 @@ <indexterm> <primary>substr</primary> </indexterm> - <function>substr</function> ( <parameter>string</parameter> <type>text</type>, <parameter>start</parameter> <type>integer</type> <optional>, <parameter>count</parameter> <type>integer</type> </optional> ) + <function>substr</function> ( <parameter>source</parameter> <type>text</type>, <parameter>start</parameter> <type>integer</type> <optional>, <parameter>count</parameter> <type>integer</type> </optional> ) <returnvalue>text</returnvalue> </para> <para> - Extracts the substring of <parameter>string</parameter> starting at + Extracts the substring of <parameter>source</parameter> starting at the <parameter>start</parameter>'th character, and extending for <parameter>count</parameter> characters if that is specified. (Same diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 60f7ce502f6..6c6f31f7043 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -3718,6 +3718,7 @@ prosrc => 'rtrim' }, { oid => '877', descr => 'extract portion of string', proname => 'substr', prorettype => 'text', proargtypes => 'text int4 int4', + proargnames => '{source, start, count}', prosrc => 'text_substr' }, { oid => '878', descr => 'map a set of characters appearing in string', proname => 'translate', prorettype => 'text', proargtypes => 'text text text', @@ -3736,6 +3737,7 @@ prosrc => 'rtrim1' }, { oid => '883', descr => 'extract portion of string', proname => 'substr', prorettype => 'text', proargtypes => 'text int4', + proargnames => '{source, start}', prosrc => 'text_substr_no_len' }, { oid => '884', descr => 'trim selected characters from both ends of string', proname => 'btrim', prorettype => 'text', proargtypes => 'text text', @@ -6306,9 +6308,11 @@ prosrc => 'bytea_substr_no_len' }, { oid => '2085', descr => 'extract portion of string', proname => 'substr', prorettype => 'bytea', proargtypes => 'bytea int4 int4', + proargnames => '{source, start, count}', prosrc => 'bytea_substr' }, { oid => '2086', descr => 'extract portion of string', proname => 'substr', prorettype => 'bytea', proargtypes => 'bytea int4', + proargnames => '{source, start}', prosrc => 'bytea_substr_no_len' }, { oid => '2014', descr => 'position of substring', proname => 'position', prorettype => 'int4', proargtypes => 'bytea bytea', -- 2.34.1
From 8fe7182867f833af9bdb2704a01ec358633fb5fd Mon Sep 17 00:00:00 2001 From: jian he <[email protected]> Date: Wed, 31 Dec 2025 15:24:54 +0800 Subject: [PATCH v10 2/2] Add argument names to the substring functions This change allows substring function to be called using named-argument notation, which can be helpful for readability, particularly for the ones with many arguments. No changes to the citext extension are required, since the citext data type does not define a specialized substring function. commitfest: https://commitfest.postgresql.org/patch/5524 Discussion: https://postgr.es/m/CACJufxHTBkymh06D4mGKNe1YfRNFN+gFBybmygWk=ptmqu0...@mail.gmail.com --- doc/src/sgml/func/func-binarystring.sgml | 29 ++++++++-- doc/src/sgml/func/func-bitstring.sgml | 27 +++++++++- doc/src/sgml/func/func-matching.sgml | 28 ++++++---- doc/src/sgml/func/func-string.sgml | 69 +++++++++++++++++++++--- src/backend/catalog/system_functions.sql | 2 +- src/include/catalog/pg_proc.dat | 8 +++ 6 files changed, 140 insertions(+), 23 deletions(-) diff --git a/doc/src/sgml/func/func-binarystring.sgml b/doc/src/sgml/func/func-binarystring.sgml index 58051595126..6f3b6c40a62 100644 --- a/doc/src/sgml/func/func-binarystring.sgml +++ b/doc/src/sgml/func/func-binarystring.sgml @@ -200,11 +200,11 @@ <indexterm> <primary>substring</primary> </indexterm> - <function>substring</function> ( <parameter>bytes</parameter> <type>bytea</type> <optional> <literal>FROM</literal> <parameter>start</parameter> <type>integer</type> </optional> <optional> <literal>FOR</literal> <parameter>count</parameter> <type>integer</type> </optional> ) + <function>substring</function> ( <parameter>source</parameter> <type>bytea</type> <optional> <literal>FROM</literal> <parameter>start</parameter> <type>integer</type> </optional> <optional> <literal>FOR</literal> <parameter>count</parameter> <type>integer</type> </optional> ) <returnvalue>bytea</returnvalue> </para> <para> - Extracts the substring of <parameter>bytes</parameter> starting at + Extracts the substring of <parameter>source</parameter> starting at the <parameter>start</parameter>'th byte if that is specified, and stopping after <parameter>count</parameter> bytes if that is specified. Provide at least one of <parameter>start</parameter> @@ -216,6 +216,29 @@ </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>substring</primary> + </indexterm> + <function>substring</function> ( + <parameter>source</parameter> <type>bytea</type>, + <parameter>start</parameter> <type>integer</type> + <optional>, <parameter>count</parameter> <type>integer</type> </optional> ) + <returnvalue>bytea</returnvalue> + </para> + <para> + Extracts the substring of <parameter>source</parameter> starting at + the <parameter>start</parameter>'th byte, + and stopping after <parameter>count</parameter> bytes if that is + specified. + </para> + <para> + <literal>substring(source=>'\x1234567890'::bytea, start=>3, count=>2)</literal> + <returnvalue>\x5678</returnvalue> + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> @@ -579,7 +602,7 @@ the <parameter>start</parameter>'th byte, and extending for <parameter>count</parameter> bytes if that is specified. (Same - as <literal>substring(<parameter>bytes</parameter> + as <literal>substring(<parameter>source</parameter> from <parameter>start</parameter> for <parameter>count</parameter>)</literal>.) </para> diff --git a/doc/src/sgml/func/func-bitstring.sgml b/doc/src/sgml/func/func-bitstring.sgml index 3f59de464a4..2c85989228c 100644 --- a/doc/src/sgml/func/func-bitstring.sgml +++ b/doc/src/sgml/func/func-bitstring.sgml @@ -279,11 +279,11 @@ <indexterm> <primary>substring</primary> </indexterm> - <function>substring</function> ( <parameter>bits</parameter> <type>bit</type> <optional> <literal>FROM</literal> <parameter>start</parameter> <type>integer</type> </optional> <optional> <literal>FOR</literal> <parameter>count</parameter> <type>integer</type> </optional> ) + <function>substring</function> ( <parameter>source</parameter> <type>bit</type> <optional> <literal>FROM</literal> <parameter>start</parameter> <type>integer</type> </optional> <optional> <literal>FOR</literal> <parameter>count</parameter> <type>integer</type> </optional> ) <returnvalue>bit</returnvalue> </para> <para> - Extracts the substring of <parameter>bits</parameter> starting at + Extracts the substring of <parameter>source</parameter> starting at the <parameter>start</parameter>'th bit if that is specified, and stopping after <parameter>count</parameter> bits if that is specified. Provide at least one of <parameter>start</parameter> @@ -295,6 +295,29 @@ </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>substring</primary> + </indexterm> + <function>substring</function> ( + <parameter>source</parameter> <type>bit</type>, + <parameter>start</parameter> <type>integer</type> + <optional>, <parameter>count</parameter> <type>integer</type> </optional> ) + <returnvalue>bit</returnvalue> + </para> + <para> + Extracts the substring of <parameter>source</parameter> starting at + the <parameter>start</parameter>'th bit, + and stopping after <parameter>count</parameter> bits if that is + specified. + </para> + <para> + <literal>substring(source=>B'110010111111', start=>3, count=>2)</literal> + <returnvalue>00</returnvalue> + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> diff --git a/doc/src/sgml/func/func-matching.sgml b/doc/src/sgml/func/func-matching.sgml index f466860ddb0..fb574bb83cb 100644 --- a/doc/src/sgml/func/func-matching.sgml +++ b/doc/src/sgml/func/func-matching.sgml @@ -234,13 +234,13 @@ </indexterm> <synopsis> -<replaceable>string</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional> -<replaceable>string</replaceable> NOT SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional> +<replaceable>source</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional> +<replaceable>source</replaceable> NOT SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional> </synopsis> <para> The <function>SIMILAR TO</function> operator returns true or - false depending on whether its pattern matches the given string. + false depending on whether its pattern matches the given string (the <replaceable>source</replaceable>). It is similar to <function>LIKE</function>, except that it interprets the pattern using the SQL standard's definition of a regular expression. SQL regular expressions are a curious cross @@ -369,15 +369,15 @@ regular expression pattern. The function can be written according to standard SQL syntax: <synopsis> -substring(<replaceable>string</replaceable> SIMILAR <replaceable>pattern</replaceable> ESCAPE <replaceable>escape-character</replaceable>) +substring(<replaceable>source</replaceable> SIMILAR <replaceable>pattern</replaceable> ESCAPE <replaceable>escape</replaceable>) </synopsis> or using the now obsolete SQL:1999 syntax: <synopsis> -substring(<replaceable>string</replaceable> FROM <replaceable>pattern</replaceable> FOR <replaceable>escape-character</replaceable>) +substring(<replaceable>source</replaceable> FROM <replaceable>pattern</replaceable> FOR <replaceable>escape</replaceable>) </synopsis> or as a plain three-argument function: <synopsis> -substring(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>, <replaceable>escape-character</replaceable>) +substring(<replaceable>source</replaceable>, <replaceable>pattern</replaceable>, <replaceable>escape</replaceable>) </synopsis> As with <literal>SIMILAR TO</literal>, the specified pattern must match the entire data string, or else the @@ -581,11 +581,17 @@ substring('foobar' SIMILAR '#"o_b#"%' ESCAPE '#') <lineannotation>NULL</linea </para> <para> - The <function>substring</function> function with two parameters, - <function>substring(<replaceable>string</replaceable> from - <replaceable>pattern</replaceable>)</function>, provides extraction of a - substring - that matches a POSIX regular expression pattern. It returns null if + The <function>substring</function> function with two parameters provides extraction of a + substring that matches a <acronym>POSIX</acronym> regular expression pattern. + The function can be written according to standard <acronym>SQL</acronym> syntax: +<synopsis> +substring(<replaceable>source</replaceable> FROM <replaceable>pattern</replaceable>) +</synopsis> + It can also written as a plain two-argument function: +<synopsis> +substring(<replaceable>source</replaceable>, <replaceable>pattern</replaceable>) +</synopsis> + It returns null if there is no match, otherwise the first portion of the text that matched the pattern. But if the pattern contains any parentheses, the portion of the text that matched the first parenthesized subexpression (the diff --git a/doc/src/sgml/func/func-string.sgml b/doc/src/sgml/func/func-string.sgml index 3325ade065a..41035c1e8af 100644 --- a/doc/src/sgml/func/func-string.sgml +++ b/doc/src/sgml/func/func-string.sgml @@ -400,11 +400,11 @@ <indexterm> <primary>substring</primary> </indexterm> - <function>substring</function> ( <parameter>string</parameter> <type>text</type> <optional> <literal>FROM</literal> <parameter>start</parameter> <type>integer</type> </optional> <optional> <literal>FOR</literal> <parameter>count</parameter> <type>integer</type> </optional> ) + <function>substring</function> ( <parameter>source</parameter> <type>text</type> <optional> <literal>FROM</literal> <parameter>start</parameter> <type>integer</type> </optional> <optional> <literal>FOR</literal> <parameter>count</parameter> <type>integer</type> </optional> ) <returnvalue>text</returnvalue> </para> <para> - Extracts the substring of <parameter>string</parameter> starting at + Extracts the substring of <parameter>source</parameter> starting at the <parameter>start</parameter>'th character if that is specified, and stopping after <parameter>count</parameter> characters if that is specified. Provide at least one of <parameter>start</parameter> @@ -426,7 +426,7 @@ <row> <entry role="func_table_entry"><para role="func_signature"> - <function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> ) + <function>substring</function> ( <parameter>source</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> ) <returnvalue>text</returnvalue> </para> <para> @@ -441,11 +441,11 @@ <row> <entry role="func_table_entry"><para role="func_signature"> - <function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>SIMILAR</literal> <parameter>pattern</parameter> <type>text</type> <literal>ESCAPE</literal> <parameter>escape</parameter> <type>text</type> ) + <function>substring</function> ( <parameter>source</parameter> <type>text</type> <literal>SIMILAR</literal> <parameter>pattern</parameter> <type>text</type> <literal>ESCAPE</literal> <parameter>escape</parameter> <type>text</type> ) <returnvalue>text</returnvalue> </para> <para role="func_signature"> - <function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> ) + <function>substring</function> ( <parameter>source</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> ) <returnvalue>text</returnvalue> </para> <para> @@ -1398,7 +1398,7 @@ the <parameter>start</parameter>'th character, and extending for <parameter>count</parameter> characters if that is specified. (Same - as <literal>substring(<parameter>string</parameter> + as <literal>substring(<parameter>source</parameter> from <parameter>start</parameter> for <parameter>count</parameter>)</literal>.) </para> @@ -1412,6 +1412,63 @@ </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>substring</primary> + </indexterm> + <function>substring</function> ( + <parameter>source</parameter> <type>text</type>, + <parameter>start</parameter> <type>integer</type> + <optional>, <parameter>count</parameter> <type>integer</type> </optional> ) + <returnvalue>text</returnvalue> + </para> + <para> + Extracts the substring of <parameter>source</parameter> starting at the + <parameter>start</parameter>'th character, and stopping after + <parameter>count</parameter> characters if that is specified. + </para> + <para> + <literal>substring(source=>'Thomas', start=>2, count=>3)</literal> + <returnvalue>hom</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <function>substring</function> ( + <parameter>source</parameter> <type>text</type>, + <parameter>pattern</parameter> <type>text</type>, + <parameter>escape</parameter> <type>text</type>) + <returnvalue>text</returnvalue> + </para> + <para> + Extracts the first substring matching <acronym>SQL</acronym> regular expression; + see <xref linkend="functions-similarto-regexp"/>. + </para> + <para> + <literal>substring('Thomas', '%#"o_a#"_', '#')</literal> + <returnvalue>oma</returnvalue> + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <function>substring</function> ( + <parameter>source</parameter> <type>text</type>, + <parameter>pattern</parameter> <type>text</type> ) + <returnvalue>text</returnvalue> + </para> + <para> + Extracts the first substring matching <acronym>POSIX</acronym> regular expression; + see <xref linkend="functions-posix-regexp"/>. + </para> + <para> + <literal>substring('Thomas', '...$')</literal> + <returnvalue>mas</returnvalue> + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 2d946d6d9e9..f0850f9ef64 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -42,7 +42,7 @@ CREATE OR REPLACE FUNCTION rpad(text, integer) IMMUTABLE PARALLEL SAFE STRICT COST 1 RETURN rpad($1, $2, ' '); -CREATE OR REPLACE FUNCTION "substring"(text, text, text) +CREATE OR REPLACE FUNCTION "substring"(source text, pattern text, escape text) RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT COST 1 diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 6c6f31f7043..aed03195012 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -3748,9 +3748,11 @@ { oid => '936', descr => 'extract portion of string', proname => 'substring', prorettype => 'text', proargtypes => 'text int4 int4', + proargnames => '{source, start, count}', prosrc => 'text_substr' }, { oid => '937', descr => 'extract portion of string', proname => 'substring', prorettype => 'text', proargtypes => 'text int4', + proargnames => '{source, start}', prosrc => 'text_substr_no_len' }, { oid => '2087', descr => 'replace all occurrences in string of old_substr with new_substr', @@ -4168,6 +4170,7 @@ prosrc => 'bitcat' }, { oid => '1680', descr => 'extract portion of bitstring', proname => 'substring', prorettype => 'bit', proargtypes => 'bit int4 int4', + proargnames => '{source, start, count}', prosrc => 'bitsubstr' }, { oid => '1681', descr => 'bitstring length', proname => 'length', prorettype => 'int4', proargtypes => 'bit', @@ -4197,6 +4200,7 @@ prosrc => 'bitposition' }, { oid => '1699', descr => 'extract portion of bitstring', proname => 'substring', prorettype => 'bit', proargtypes => 'bit int4', + proargnames => '{source, start}', prosrc => 'bitsubstr_no_len' }, { oid => '3030', descr => 'substitute portion of bitstring', @@ -6302,9 +6306,11 @@ prosrc => 'byteacat' }, { oid => '2012', descr => 'extract portion of string', proname => 'substring', prorettype => 'bytea', + proargnames => '{source, start, count}', proargtypes => 'bytea int4 int4', prosrc => 'bytea_substr' }, { oid => '2013', descr => 'extract portion of string', proname => 'substring', prorettype => 'bytea', proargtypes => 'bytea int4', + proargnames => '{source, start}', prosrc => 'bytea_substr_no_len' }, { oid => '2085', descr => 'extract portion of string', proname => 'substr', prorettype => 'bytea', proargtypes => 'bytea int4 int4', @@ -6504,9 +6510,11 @@ { oid => '2073', descr => 'extract text matching regular expression', proname => 'substring', prorettype => 'text', proargtypes => 'text text', + proargnames => '{source, pattern}', prosrc => 'textregexsubstr' }, { oid => '2074', descr => 'extract text matching SQL regular expression', proname => 'substring', prolang => 'sql', prorettype => 'text', + proargnames => '{source, pattern, escape}', proargtypes => 'text text text', prosrc => 'see system_functions.sql' }, { oid => '2075', descr => 'convert int8 to bitstring', -- 2.34.1
