On Fri, 2 Feb 2007, David Fetter wrote: > On Fri, Feb 02, 2007 at 08:56:31PM -0500, Tom Lane wrote: > > > > All of SQL's pattern match operators have the pattern on the right, > > so my advice is to stick with that and try not to think about Perl > > ;-) > > Perl provides inspiration, but here, consistency would help more than > orderly imitation of how it does what it does. And besides, when > people really need Perl, they can pull it in as a PL :)
Alright, here is my code to date. I have put the pattern after the string
in the split function, as discussed above. The .tar.gz file expects to be
untarred in contrib/. I have made some regression tests that can be run
using 'make installcheck' as normal for contrib. I think they exercise
the corner cases in the code, but I may very well have missed some. It
requires the (previously submitted) attached patch to core to compile, as
it takes advantage of new exported functions from
src/backend/utils/adt/regexp.c.
Let me know if you see any bugs or issues with this code, and I am open to
suggestions for further regression tests ;)
Things that I still want to look into:
* regexp flags (a la regexp_replace).
* maybe make regexp_matches return setof whatever, if given a 'g' flag
return all matches in string.
* maybe a join function that works as an aggregate
SELECT join(',', col) FROM tbl
currently can be written as
SELECT array_to_string(ARRAY(SELECT col FROM tbl), ',')
--
It was a virgin forest, a place where the Hand of Man had never set
foot.Index: src/backend/utils/adt/regexp.c
===================================================================
RCS file:
/home/jeremyd/local/postgres/cvsuproot/pgsql/src/backend/utils/adt/regexp.c,v
retrieving revision 1.68
diff -c -r1.68 regexp.c
*** src/backend/utils/adt/regexp.c 5 Jan 2007 22:19:41 -0000 1.68
--- src/backend/utils/adt/regexp.c 2 Feb 2007 02:45:32 -0000
***************
*** 29,41 ****
*/
#include "postgres.h"
- #include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/guc.h"
/* GUC-settable flavor parameter */
! static int regex_flavor = REG_ADVANCED;
/*
--- 29,41 ----
*/
#include "postgres.h"
#include "utils/builtins.h"
#include "utils/guc.h"
+ #include "utils/regexp.h"
/* GUC-settable flavor parameter */
! int regex_flavor = REG_ADVANCED;
/*
***************
*** 90,96 ****
* Pattern is given in the database encoding. We internally convert to
* array of pg_wchar which is what Spencer's regex package wants.
*/
! static regex_t *
RE_compile_and_cache(text *text_re, int cflags)
{
int text_re_len = VARSIZE(text_re);
--- 90,96 ----
* Pattern is given in the database encoding. We internally convert to
* array of pg_wchar which is what Spencer's regex package wants.
*/
! regex_t *
RE_compile_and_cache(text *text_re, int cflags)
{
int text_re_len = VARSIZE(text_re);
***************
*** 191,238 ****
}
/*
! * RE_compile_and_execute - compile and execute a RE
*
* Returns TRUE on match, FALSE on no match
*
! * text_re --- the pattern, expressed as an *untoasted* TEXT object
! * dat --- the data to match against (need not be null-terminated)
! * dat_len --- the length of the data string
! * cflags --- compile options for the pattern
* nmatch, pmatch --- optional return area for match details
*
! * Both pattern and data are given in the database encoding. We internally
! * convert to array of pg_wchar which is what Spencer's regex package wants.
*/
! static bool
! RE_compile_and_execute(text *text_re, char *dat, int dat_len,
! int cflags, int nmatch, regmatch_t
*pmatch)
{
- pg_wchar *data;
- size_t data_len;
int regexec_result;
- regex_t *re;
char errMsg[100];
- /* Convert data string to wide characters */
- data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
- data_len = pg_mb2wchar_with_len(dat, data, dat_len);
-
- /* Compile RE */
- re = RE_compile_and_cache(text_re, cflags);
-
/* Perform RE match and return result */
regexec_result = pg_regexec(re,
data,
data_len,
! 0,
NULL, /* no
details */
nmatch,
pmatch,
0);
- pfree(data);
-
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
{
/* re failed??? */
--- 191,226 ----
}
/*
! * RE_wchar_execute - execute a RE
*
* Returns TRUE on match, FALSE on no match
*
! * re --- the compiled pattern as returned by RE_compile_and_cache
! * data --- the data to match against (need not be null-terminated)
! * data_len --- the length of the data string
! * start_search -- the offset in the data to start searching
* nmatch, pmatch --- optional return area for match details
*
! * Data is given as array of pg_wchar which is what Spencer's regex package
! * wants.
*/
! bool
! RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len, size_t
start_search,
! int nmatch, regmatch_t *pmatch)
{
int regexec_result;
char errMsg[100];
/* Perform RE match and return result */
regexec_result = pg_regexec(re,
data,
data_len,
! start_search,
NULL, /* no
details */
nmatch,
pmatch,
0);
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
{
/* re failed??? */
***************
*** 245,250 ****
--- 233,299 ----
return (regexec_result == REG_OKAY);
}
+ /*
+ * RE_execute - execute a RE
+ *
+ * Returns TRUE on match, FALSE on no match
+ *
+ * re --- the compiled pattern as returned by RE_compile_and_cache
+ * dat --- the data to match against (need not be null-terminated)
+ * dat_len --- the length of the data string
+ * nmatch, pmatch --- optional return area for match details
+ *
+ * Data is given in the database encoding. We internally
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
+ */
+ bool
+ RE_execute(regex_t *re, char *dat, int dat_len,
+ int nmatch, regmatch_t *pmatch)
+ {
+ pg_wchar *data;
+ size_t data_len;
+ bool match;
+
+ /* Convert data string to wide characters */
+ data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
+ data_len = pg_mb2wchar_with_len(dat, data, dat_len);
+
+ /* Perform RE match and return result */
+ match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
+ pfree(data);
+ return match;
+ }
+
+ /*
+ * RE_compile_and_execute - compile and execute a RE
+ *
+ * Returns TRUE on match, FALSE on no match
+ *
+ * text_re --- the pattern, expressed as an *untoasted* TEXT object
+ * dat --- the data to match against (need not be null-terminated)
+ * dat_len --- the length of the data string
+ * cflags --- compile options for the pattern
+ * nmatch, pmatch --- optional return area for match details
+ *
+ * Both pattern and data are given in the database encoding. We internally
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
+ */
+ bool
+ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
+ int cflags, int nmatch, regmatch_t
*pmatch)
+ {
+ pg_wchar *data;
+ size_t data_len;
+ int regexec_result;
+ regex_t *re;
+ char errMsg[100];
+
+ /* Compile RE */
+ re = RE_compile_and_cache(text_re, cflags);
+
+ return RE_execute(re, dat, dat_len, nmatch, pmatch);
+ }
+
/*
* assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR
Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file:
/home/jeremyd/local/postgres/cvsuproot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.154
diff -c -r1.154 varlena.c
*** src/backend/utils/adt/varlena.c 5 Jan 2007 22:19:42 -0000 1.154
--- src/backend/utils/adt/varlena.c 2 Feb 2007 02:50:31 -0000
***************
*** 23,32 ****
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "parser/scansup.h"
- #include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
typedef struct varlena unknown;
--- 23,32 ----
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "parser/scansup.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
+ #include "utils/regexp.h"
typedef struct varlena unknown;
***************
*** 2355,2386 ****
search_start = 0;
while (search_start <= data_len)
{
- int regexec_result;
-
CHECK_FOR_INTERRUPTS();
! regexec_result = pg_regexec(re,
! data,
!
data_len,
!
search_start,
! NULL,
/* no details */
!
REGEXP_REPLACE_BACKREF_CNT,
! pmatch,
! 0);
!
! if (regexec_result == REG_NOMATCH)
break;
- if (regexec_result != REG_OKAY)
- {
- char errMsg[100];
-
- pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
- ereport(ERROR,
-
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
- errmsg("regular expression failed:
%s", errMsg)));
- }
-
/*
* Copy the text to the left of the match position. Note we are
* given character not byte indexes.
--- 2355,2366 ----
search_start = 0;
while (search_start <= data_len)
{
CHECK_FOR_INTERRUPTS();
! if (!RE_wchar_execute (re, data, data_len, search_start,
!
REGEXP_REPLACE_BACKREF_CNT, pmatch))
break;
/*
* Copy the text to the left of the match position. Note we are
* given character not byte indexes.
*** ../pgsql-orig/src/include/utils/regexp.h Wed Dec 31 16:00:00 1969
--- src/include/utils/regexp.h Thu Feb 1 18:46:49 2007
***************
*** 0 ****
--- 1,29 ----
+ /*-------------------------------------------------------------------------
+ *
+ * regexp.h
+ * Header file for regexp connector code.
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+ #ifndef REGEXP_H
+ #define REGEXP_H
+
+ #include "regex/regex.h"
+
+ /* regexp support routines for PostgreSQL-izing regexp code */
+ extern regex_t * RE_compile_and_cache(text *text_re, int cflags);
+ extern bool RE_compile_and_execute(text *text_re, char *dat, int dat_len,
+ int cflags, int nmatch, regmatch_t
*pmatch);
+ extern bool RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
+ size_t start_search, int nmatch,
regmatch_t *pmatch);
+ extern bool RE_execute(regex_t *re, char *dat, int dat_len,
+ int nmatch, regmatch_t *pmatch);
+
+ /* regexp flavor GUC variable */
+ extern int regex_flavor;
+
+ #endif /* REGEXP_H */
regexp_ext.tar.gz
Description: Binary data
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to [EMAIL PROTECTED] so that your
message can get through to the mailing list cleanly
