helly Sun Jul 16 21:09:30 2006 UTC
Modified files: (Branch: PHP_5_2)
/php-src/ext/pcre php_pcre.c php_pcre.h
Log:
- MFH: Little bit refactoring: Two new PHPAPI functions:
. php_pcre_match
. php_pcre_split
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.168.2.9.2.7&r2=1.168.2.9.2.8&diff_format=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.168.2.9.2.7
php-src/ext/pcre/php_pcre.c:1.168.2.9.2.8
--- php-src/ext/pcre/php_pcre.c:1.168.2.9.2.7 Thu Jun 15 18:33:08 2006
+++ php-src/ext/pcre/php_pcre.c Sun Jul 16 21:09:30 2006
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.c,v 1.168.2.9.2.7 2006/06/15 18:33:08 dmitry Exp $ */
+/* $Id: php_pcre.c,v 1.168.2.9.2.8 2006/07/16 21:09:30 helly Exp $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -426,41 +426,22 @@
}
/* }}} */
-/* {{{ php_pcre_match
- */
-static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
+static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{
*/
{
/* parameters */
char *regex; /* Regular
expression */
char *subject; /* String to match
against */
int regex_len;
int subject_len;
- zval *subpats = NULL; /* Array for
subpatterns */
- long flags; /* Match
control flags */
-
- zval *result_set, /* Holds a set of
subpatterns after
-
a global match */
- **match_sets = NULL; /* An array of sets of
matches for each
-
subpattern after a global match */
pcre *re = NULL; /* Compiled
regular expression */
pcre_extra *extra = NULL; /* Holds results of
studying pattern */
- pcre_extra extra_data; /* Used locally for
exec options */
- int exoptions = 0; /* Execution
options */
+ zval *subpats = NULL; /* Array for
subpatterns */
+ long flags; /* Match
control flags */
int preg_options = 0; /* Custom preg
options */
- int count = 0; /*
Count of matched subpatterns */
- int *offsets; /*
Array of subpattern offsets */
- int num_subpats; /* Number of
captured subpatterns */
- int size_offsets; /* Size of the
offsets array */
long start_offset = 0; /* Where the new search
starts */
- int matched; /* Has
anything matched */
int subpats_order = 0; /* Order of
subpattern matches */
int offset_capture = 0;/* Capture match
offsets: yes/no */
- int g_notempty = 0; /* If the match
should not be empty */
- const char **stringlist; /* Holds list of
subpatterns */
- char *match; /* The current
match */
- char **subpat_names = NULL;/* Array for named subpatterns
*/
- int i, rc;
-
+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ?
"ssz|ll" : "ss|zll"), ®ex, ®ex_len,
&subject,
&subject_len, &subpats, &flags, &start_offset) == FAILURE) {
RETURN_FALSE;
@@ -485,6 +466,36 @@
}
}
+ /* Compile regex or get it from cache. */
+ if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options
TSRMLS_CC)) == NULL) {
+ RETURN_FALSE;
+ }
+
+ php_pcre_match(re, extra, subject, subject_len, return_value, subpats,
+ global, preg_options, start_offset, subpats_order,
offset_capture TSRMLS_CC);
+}
+
+PHPAPI void php_pcre_match(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
+ zval *subpats, int global, int preg_options, long start_offset, int
subpats_order, int offset_capture TSRMLS_DC)
+{
+
+ zval *result_set, /* Holds a set of
subpatterns after
+
a global match */
+ **match_sets = NULL; /* An array of sets of
matches for each
+
subpattern after a global match */
+ pcre_extra extra_data; /* Used locally for
exec options */
+ int exoptions = 0; /* Execution
options */
+ int count = 0; /*
Count of matched subpatterns */
+ int *offsets; /*
Array of subpattern offsets */
+ int num_subpats; /* Number of
captured subpatterns */
+ int size_offsets; /* Size of the
offsets array */
+ int matched; /* Has
anything matched */
+ int g_notempty = 0; /* If the match
should not be empty */
+ const char **stringlist; /* Holds list of
subpatterns */
+ char *match; /* The current
match */
+ char **subpat_names = NULL;/* Array for named subpatterns
*/
+ int i, rc;
+
/* Overwrite the passed-in value for subpatterns with an empty array. */
if (subpats != NULL) {
zval_dtor(subpats);
@@ -499,11 +510,6 @@
}
}
- /* Compile regex or get it from cache. */
- if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options
TSRMLS_CC)) == NULL) {
- RETURN_FALSE;
- }
-
if (extra == NULL) {
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra = &extra_data;
@@ -726,7 +732,7 @@
Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)
{
- php_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
+ php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */
@@ -734,7 +740,7 @@
Perform a Perl-style global regular expression match */
PHP_FUNCTION(preg_match_all)
{
- php_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
+ php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */
@@ -1335,14 +1341,6 @@
**subject, /* Subject
string to split */
**limit, /*
Number of pieces to return */
**flags;
- pcre *re = NULL; /* Compiled
regular expression */
- pcre *re_bump = NULL; /* Regex instance for
empty matches */
- pcre_extra *extra = NULL; /* Holds results of
studying */
- pcre_extra *extra_bump = NULL; /* Almost dummy */
- pcre_extra extra_data; /* Used locally for
exec options */
- int *offsets; /*
Array of subpattern offsets */
- int size_offsets; /* Size of the
offsets array */
- int exoptions = 0; /* Execution
options */
int preg_options = 0; /* Custom preg
options */
int coptions = 0; /* Custom preg options
*/
int argc; /*
Argument count */
@@ -1350,13 +1348,8 @@
int no_empty = 0; /* If NO_EMPTY
flag is set */
int delim_capture = 0; /* If delimiters
should be captured */
int offset_capture = 0;/* If offsets
should be captured */
- int count = 0; /*
Count of matched subpatterns */
- int start_offset; /* Where the
new search starts */
- int next_offset; /* End of the
last delimiter match + 1 */
- int g_notempty = 0; /* If the match
should not be empty */
- char *match, /* The current
match */
- *last_match; /* Location of
last match */
- int rc;
+ pcre *re; /* Compiled
regular expression */
+ pcre_extra *extra = NULL; /* Holds results of
studying */
/* Get function parameters and do error checking */
argc = ZEND_NUM_ARGS();
@@ -1387,6 +1380,29 @@
RETURN_FALSE;
}
+ php_pcre_split(re, extra, Z_STRVAL_PP(subject), Z_STRLEN_PP(subject),
return_value,
+ coptions, limit_val, no_empty, delim_capture, offset_capture
TSRMLS_CC);
+}
+
+/* {{{ php_pcre_split
+ */
+PHPAPI void php_pcre_split(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
+ int coptions, int limit_val, int no_empty, int delim_capture, int
offset_capture TSRMLS_DC)
+{
+ pcre *re_bump = NULL; /* Regex instance for
empty matches */
+ pcre_extra *extra_bump = NULL; /* Almost dummy */
+ pcre_extra extra_data; /* Used locally for
exec options */
+ int *offsets; /*
Array of subpattern offsets */
+ int size_offsets; /* Size of the
offsets array */
+ int exoptions = 0; /* Execution
options */
+ int count = 0; /*
Count of matched subpatterns */
+ int start_offset; /* Where the
new search starts */
+ int next_offset; /* End of the
last delimiter match + 1 */
+ int g_notempty = 0; /* If the match
should not be empty */
+ char *match, /* The current
match */
+ *last_match; /* Location of
last match */
+ int rc;
+
if (extra == NULL) {
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra = &extra_data;
@@ -1410,14 +1426,14 @@
/* Start at the beginning of the string */
start_offset = 0;
next_offset = 0;
- last_match = Z_STRVAL_PP(subject);
+ last_match = subject;
match = NULL;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
/* Get next piece if no limit or limit not yet reached and something
matched*/
while ((limit_val == -1 || limit_val > 1)) {
- count = pcre_exec(re, extra, Z_STRVAL_PP(subject),
- Z_STRLEN_PP(subject),
start_offset,
+ count = pcre_exec(re, extra, subject,
+ subject_len, start_offset,
exoptions|g_notempty,
offsets, size_offsets);
/* Check for too many substrings condition. */
@@ -1428,17 +1444,17 @@
/* If something matched */
if (count > 0) {
- match = Z_STRVAL_PP(subject) + offsets[0];
+ match = subject + offsets[0];
- if (!no_empty || &Z_STRVAL_PP(subject)[offsets[0]] !=
last_match) {
+ if (!no_empty || &subject[offsets[0]] != last_match) {
if (offset_capture) {
/* Add (match, offset) pair to the
return value */
- add_offset_pair(return_value,
last_match, &Z_STRVAL_PP(subject)[offsets[0]]-last_match, next_offset, NULL);
+ add_offset_pair(return_value,
last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
} else {
/* Add the piece to the return value */
add_next_index_stringl(return_value,
last_match,
-
&Z_STRVAL_PP(subject)[offsets[0]]-last_match, 1);
+
&subject[offsets[0]]-last_match, 1);
}
/* One less left to do */
@@ -1446,7 +1462,7 @@
limit_val--;
}
- last_match = &Z_STRVAL_PP(subject)[offsets[1]];
+ last_match = &subject[offsets[1]];
next_offset = offsets[1];
if (delim_capture) {
@@ -1456,10 +1472,10 @@
/* If we have matched a delimiter */
if (!no_empty || match_len > 0) {
if (offset_capture) {
-
add_offset_pair(return_value, &Z_STRVAL_PP(subject)[offsets[i<<1]], match_len,
offsets[i<<1], NULL);
+
add_offset_pair(return_value, &subject[offsets[i<<1]], match_len,
offsets[i<<1], NULL);
} else {
add_next_index_stringl(return_value,
-
&Z_STRVAL_PP(subject)[offsets[i<<1]],
+
&subject[offsets[i<<1]],
match_len, 1);
}
}
@@ -1470,7 +1486,7 @@
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset
values
to achieve this, unless we're already at the end of
the string. */
- if (g_notempty != 0 && start_offset <
Z_STRLEN_PP(subject)) {
+ if (g_notempty != 0 && start_offset < subject_len) {
if (coptions & PCRE_UTF8) {
if (re_bump == NULL) {
int dummy;
@@ -1479,8 +1495,8 @@
RETURN_FALSE;
}
}
- count = pcre_exec(re_bump, extra_bump,
Z_STRVAL_PP(subject),
- Z_STRLEN_PP(subject),
start_offset,
+ count = pcre_exec(re_bump, extra_bump,
subject,
+ subject_len,
start_offset,
exoptions, offsets,
size_offsets);
if (count < 1) {
php_error_docref(NULL
TSRMLS_CC,E_NOTICE, "Unknown error");
@@ -1509,14 +1525,14 @@
}
- if (!no_empty || start_offset != Z_STRLEN_PP(subject))
+ if (!no_empty || start_offset != subject_len)
{
if (offset_capture) {
/* Add the last (match, offset) pair to the return
value */
- add_offset_pair(return_value,
&Z_STRVAL_PP(subject)[start_offset], Z_STRLEN_PP(subject) - start_offset,
start_offset, NULL);
+ add_offset_pair(return_value, &subject[start_offset],
subject_len - start_offset, start_offset, NULL);
} else {
/* Add the last piece to the return value */
- add_next_index_stringl(return_value, last_match,
Z_STRVAL_PP(subject) + Z_STRLEN_PP(subject) - last_match, 1);
+ add_next_index_stringl(return_value, last_match,
subject + subject_len - last_match, 1);
}
}
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.h?r1=1.41.2.1.2.2&r2=1.41.2.1.2.3&diff_format=u
Index: php-src/ext/pcre/php_pcre.h
diff -u php-src/ext/pcre/php_pcre.h:1.41.2.1.2.2
php-src/ext/pcre/php_pcre.h:1.41.2.1.2.3
--- php-src/ext/pcre/php_pcre.h:1.41.2.1.2.2 Tue May 9 23:53:40 2006
+++ php-src/ext/pcre/php_pcre.h Sun Jul 16 21:09:30 2006
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.h,v 1.41.2.1.2.2 2006/05/09 23:53:40 helly Exp $ */
+/* $Id: php_pcre.h,v 1.41.2.1.2.3 2006/07/16 21:09:30 helly Exp $ */
#ifndef PHP_PCRE_H
#define PHP_PCRE_H
@@ -44,6 +44,10 @@
PHPAPI char *php_pcre_replace(char *regex, int regex_len, char *subject, int
subject_len, zval *replace_val, int is_callable_replace, int *result_len, int
limit, int *replace_count TSRMLS_DC);
PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int
*options TSRMLS_DC);
PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int
*preg_options, int *coptions TSRMLS_DC);
+PHPAPI void php_pcre_split(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
+ int coptions, int limit_val, int no_empty, int delim_capture, int
offset_capture TSRMLS_DC);
+PHPAPI void php_pcre_match(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
+ zval *subpats, int global, int preg_options, long start_offset, int
subpats_order, int offset_capture TSRMLS_DC);
extern zend_module_entry pcre_module_entry;
#define pcre_module_ptr &pcre_module_entry
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php