scottmac Fri Aug 29 09:50:57 2008 UTC
Modified files:
/php-src/ext/fileinfo/libmagic apprentice.c file.h print.c
softmagic.c
Log:
MFB: Replace ereg code with pcre, fix duplicate macro names and segfault.
Patch by Mikko
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/apprentice.c?r1=1.8&r2=1.9&diff_format=u
Index: php-src/ext/fileinfo/libmagic/apprentice.c
diff -u php-src/ext/fileinfo/libmagic/apprentice.c:1.8
php-src/ext/fileinfo/libmagic/apprentice.c:1.9
--- php-src/ext/fileinfo/libmagic/apprentice.c:1.8 Tue Aug 26 12:24:12 2008
+++ php-src/ext/fileinfo/libmagic/apprentice.c Fri Aug 29 09:50:57 2008
@@ -1118,7 +1118,7 @@
m->mask_op = 0;
if (*l == '~') {
- if (!IS_STRING(m->type))
+ if (!IS_LIBMAGIC_STRING(m->type))
m->mask_op |= FILE_OPINVERSE;
else if (ms->flags & MAGIC_CHECK)
file_magwarn(ms, "'~' invalid for string types");
@@ -1128,7 +1128,7 @@
m->str_flags = 0;
m->num_mask = 0;
if ((op = get_op(*l)) != -1) {
- if (!IS_STRING(m->type)) {
+ if (!IS_LIBMAGIC_STRING(m->type)) {
uint64_t val;
++l;
m->mask_op |= op;
@@ -2066,7 +2066,7 @@
m->offset = swap4((uint32_t)m->offset);
m->in_offset = swap4((uint32_t)m->in_offset);
m->lineno = swap4((uint32_t)m->lineno);
- if (IS_STRING(m->type)) {
+ if (IS_LIBMAGIC_STRING(m->type)) {
m->str_range = swap4(m->str_range);
m->str_flags = swap4(m->str_flags);
}
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/file.h?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/fileinfo/libmagic/file.h
diff -u php-src/ext/fileinfo/libmagic/file.h:1.2
php-src/ext/fileinfo/libmagic/file.h:1.3
--- php-src/ext/fileinfo/libmagic/file.h:1.2 Tue Aug 26 12:24:12 2008
+++ php-src/ext/fileinfo/libmagic/file.h Fri Aug 29 09:50:57 2008
@@ -46,7 +46,11 @@
#ifdef HAVE_INTTYPES_H
#include <inttypes.h>
#endif
-#include <regex.h>
+
+#include "php.h"
+#include "ext/standard/php_string.h"
+#include "ext/pcre/php_pcre.h"
+
#include <sys/types.h>
/* Do this here and now, because struct stat gets re-defined on solaris */
#include <sys/stat.h>
@@ -165,7 +169,7 @@
#define FILE_LEDOUBLE 38
#define FILE_NAMES_SIZE 39/* size of array to
contain all names */
-#define IS_STRING(t) \
+#define IS_LIBMAGIC_STRING(t) \
((t) == FILE_STRING || \
(t) == FILE_PSTRING || \
(t) == FILE_BESTRING16 || \
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/print.c?r1=1.1&r2=1.2&diff_format=u
Index: php-src/ext/fileinfo/libmagic/print.c
diff -u php-src/ext/fileinfo/libmagic/print.c:1.1
php-src/ext/fileinfo/libmagic/print.c:1.2
--- php-src/ext/fileinfo/libmagic/print.c:1.1 Fri Jul 11 14:13:50 2008
+++ php-src/ext/fileinfo/libmagic/print.c Fri Aug 29 09:50:57 2008
@@ -74,7 +74,7 @@
if (m->mask_op & FILE_OPINVERSE)
(void) fputc('~', stderr);
- if (IS_STRING(m->type)) {
+ if (IS_LIBMAGIC_STRING(m->type)) {
if (m->str_flags) {
(void) fputc('/', stderr);
if (m->str_flags & STRING_COMPACT_BLANK)
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/softmagic.c?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/fileinfo/libmagic/softmagic.c
diff -u php-src/ext/fileinfo/libmagic/softmagic.c:1.2
php-src/ext/fileinfo/libmagic/softmagic.c:1.3
--- php-src/ext/fileinfo/libmagic/softmagic.c:1.2 Tue Aug 26 12:24:12 2008
+++ php-src/ext/fileinfo/libmagic/softmagic.c Fri Aug 29 09:50:57 2008
@@ -36,6 +36,10 @@
#include <stdlib.h>
#include <time.h>
+#ifndef PREG_OFFSET_CAPTURE
+# define PREG_OFFSET_CAPTURE (1<<8)
+#endif
+
#ifndef lint
FILE_RCSID("@(#)$File: softmagic.c,v 1.117 2008/03/01 22:21:49 rrt Exp $")
@@ -281,25 +285,18 @@
private int
check_fmt(struct magic_set *ms, struct magic *m)
{
- regex_t rx = {0};
- int rc;
-
+ pcre *pce;
+ int re_options;
+ pcre_extra *re_extra;
+
if (strchr(MAGIC_DESC, '%') == NULL) {
return 0;
}
-
- rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
- if (rc) {
- char errmsg[512];
- (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
- file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
+
+ if ((pce = pcre_get_compiled_regex("~%[-0-9.]*s~", &re_extra,
&re_options TSRMLS_CC)) == NULL) {
return -1;
} else {
- regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t),
rx.re_nsub + 1);
- rc = regexec(&rx, MAGIC_DESC, rx.re_nsub + 1, pmatch, 0);
- efree(pmatch);
- regfree(&rx);
- return !rc;
+ return !pcre_exec(pce, re_extra, MAGIC_DESC,
strlen(MAGIC_DESC), 0, re_options, NULL, 0);
}
}
@@ -1488,6 +1485,66 @@
return file_strncmp(a, b, len, flags);
}
+private void
+convert_libmagic_pattern(zval *pattern, int options)
+{
+ int i, j=0;
+ char *t;
+
+ t = (char *) safe_emalloc(Z_STRLEN_P(pattern), 2, 5);
+ memset(t, '\0', sizeof(t));
+
+ t[j++] = '~';
+
+ for (i=0; i<Z_STRLEN_P(pattern); i++, j++) {
+ switch (Z_STRVAL_P(pattern)[i]) {
+ case '?':
+ t[j] = '.';
+ break;
+ case '*':
+ t[j++] = '.';
+ t[j] = '*';
+ break;
+ case '.':
+ t[j++] = '\\';
+ t[j] = '.';
+ break;
+ case '\\':
+ t[j++] = '\\';
+ t[j] = '\\';
+ break;
+ case '(':
+ t[j++] = '\\';
+ t[j] = '(';
+ break;
+ case ')':
+ t[j++] = '\\';
+ t[j] = ')';
+ break;
+ case '~':
+ t[j++] = '\\';
+ t[j] = '~';
+ break;
+ default:
+ t[j] = Z_STRVAL_P(pattern)[i];
+ break;
+ }
+ }
+ t[j++] = '~';
+
+ if (options & PCRE_CASELESS)
+ t[j++] = 'm';
+
+ if (options & PCRE_MULTILINE)
+ t[j++] = 'i';
+
+ t[j]=0;
+
+ Z_STRVAL_P(pattern) = t;
+ Z_STRLEN_P(pattern) = j;
+
+}
+
private int
magiccheck(struct magic_set *ms, struct magic *m)
{
@@ -1642,61 +1699,156 @@
}
break;
}
+
case FILE_REGEX: {
- int rc;
- regex_t rx = {0};
- char errmsg[512];
-
- if (ms->search.s == NULL)
- return 0;
+ zval *pattern;
+ int options = 0;
+ pcre_cache_entry *pce;
+
+ MAKE_STD_ZVAL(pattern);
+ Z_STRVAL_P(pattern) = (char *)m->value.s;
+ Z_STRLEN_P(pattern) = m->vallen;
+ Z_TYPE_P(pattern) = IS_STRING;
+
+ options |= PCRE_MULTILINE;
+
+ if (m->str_flags & STRING_IGNORE_CASE) {
+ options |= PCRE_CASELESS;
+ }
+
+ convert_libmagic_pattern(pattern, options);
- l = 0;
- rc = regcomp(&rx, m->value.s,
REG_EXTENDED|REG_NEWLINE|((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
- if (rc) {
- (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
- file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
- v = (uint64_t)-1;
+ if ((pce = pcre_get_compiled_regex_cache(Z_STRVAL_P(pattern),
Z_STRLEN_P(pattern) TSRMLS_CC)) == NULL) {
+ return -1;
} else {
- regmatch_t *pmatch = (regmatch_t
*)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1);
-#ifndef REG_STARTEND
-#define REG_STARTEND 0
- size_t l = ms->search.s_len - 1;
- char c = ms->search.s[l];
- ((char *)(intptr_t)ms->search.s)[l] = '\0';
-#else
- pmatch[0].rm_so = 0;
- pmatch[0].rm_eo = ms->search.s_len;
-#endif
- rc = regexec(&rx, (const char *)ms->search.s, 1,
pmatch, REG_STARTEND);
-#if REG_STARTEND == 0
- ((char *)(intptr_t)ms->search.s)[l] = c;
-#endif
- switch (rc) {
- case 0:
- ms->search.s += (int)pmatch[0].rm_so;
- ms->search.offset += (size_t)pmatch[0].rm_so;
- ms->search.rm_len = (size_t)(pmatch[0].rm_eo -
pmatch[0].rm_so);
- v = 0;
- break;
+ /* pce now contains the compiled regex */
+ zval *retval;
+ zval *subpats;
+ char *haystack;
+
+ MAKE_STD_ZVAL(retval);
+ ALLOC_INIT_ZVAL(subpats);
+
+ /* Cut the search len from haystack, equals to
REG_STARTEND */
+ haystack = estrndup(ms->search.s, ms->search.s_len);
- case REG_NOMATCH:
- v = 1;
- break;
+ /* match v = 0, no match v = 1 */
+ php_pcre_match_impl(pce, haystack, ms->search.s_len,
retval, subpats, 1, 1, PREG_OFFSET_CAPTURE, 0 TSRMLS_CC);
+
+ /* Free haystack */
+ efree(haystack);
+
+ if (Z_LVAL_P(retval) < 0) {
+ zval_ptr_dtor(&subpats);
+ FREE_ZVAL(retval);
+ efree(Z_STRVAL_P(pattern));
+ efree(pattern);
+ return -1;
+ } else if ((Z_LVAL_P(retval) > 0) && (Z_TYPE_P(subpats)
== IS_ARRAY)) {
+
+ /* Need to fetch global match which equals
pmatch[0] */
+ HashTable *ht = Z_ARRVAL_P(subpats);
+ HashPosition outer_pos;
+ zval *pattern_match = NULL, *pattern_offset =
NULL;
+
+ zend_hash_internal_pointer_reset_ex(ht,
&outer_pos);
+
+ if (zend_hash_has_more_elements_ex(ht,
&outer_pos) == SUCCESS &&
+ zend_hash_move_forward_ex(ht,
&outer_pos)) {
+
+ zval **ppzval;
+
+ /* The first element (should be) is the
global match
+ Need to move to the inner array to
get the global match */
+
+ if (zend_hash_get_current_data_ex(ht,
(void**)&ppzval, &outer_pos) != FAILURE) {
+
+ HashTable *inner_ht;
+ HashPosition inner_pos;
+ zval **match, **offset;
+ zval tmpcopy = **ppzval,
matchcopy, offsetcopy;
+
+ zval_copy_ctor(&tmpcopy);
+ INIT_PZVAL(&tmpcopy);
+
+ inner_ht = Z_ARRVAL(tmpcopy);
+
+ /* If everything goes according
to the master plan
+ tmpcopy now contains two
elements:
+ 0 = the match
+ 1 = starting position of the
match */
+
zend_hash_internal_pointer_reset_ex(inner_ht, &inner_pos);
+
+ if
(zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
+
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
+
+ if
(zend_hash_get_current_data_ex(inner_ht, (void**)&match, &inner_pos) !=
FAILURE) {
+
+ matchcopy =
**match;
+
zval_copy_ctor(&matchcopy);
+
INIT_PZVAL(&matchcopy);
+
convert_to_string(&matchcopy);
+
+
MAKE_STD_ZVAL(pattern_match);
+
Z_STRVAL_P(pattern_match) = (char *)Z_STRVAL(matchcopy);
+
Z_STRLEN_P(pattern_match) = Z_STRLEN(matchcopy);
+
Z_TYPE_P(pattern_match) = IS_STRING;
+
+
zval_dtor(&matchcopy);
+ }
+ }
+
+ if
(zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
+
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
+
+ if
(zend_hash_get_current_data_ex(inner_ht, (void**)&offset, &inner_pos) !=
FAILURE) {
+
+ offsetcopy =
**offset;
+
zval_copy_ctor(&offsetcopy);
+
INIT_PZVAL(&offsetcopy);
+
convert_to_long(&offsetcopy);
+
+
MAKE_STD_ZVAL(pattern_offset);
+
Z_LVAL_P(pattern_offset) = Z_LVAL(offsetcopy);
+
Z_TYPE_P(pattern_offset) = IS_LONG;
+
+
zval_dtor(&offsetcopy);
+ }
+ }
+ zval_dtor(&tmpcopy);
+ }
+
+ if ((pattern_match != NULL) &&
(pattern_offset != NULL)) {
+ ms->search.s +=
(int)Z_LVAL_P(pattern_offset); /* this is where the match starts */
+ ms->search.offset +=
(size_t)Z_LVAL_P(pattern_offset); /* this is where the match starts as size_t */
+ ms->search.rm_len =
Z_STRLEN_P(pattern_match) /* This is the length of the matched pattern */;
+ v = 0;
+
+ efree(pattern_match);
+ efree(pattern_offset);
+
+ } else {
+ zval_ptr_dtor(&subpats);
+ FREE_ZVAL(retval);
+ efree(Z_STRVAL_P(pattern));
+ efree(pattern);
+ return -1;
+ }
+ }
- default:
- (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
- file_magerror(ms, "regexec error %d, (%s)", rc,
errmsg);
- v = (uint64_t)-1;
- break;
+
+ } else {
+ v = 1;
}
- regfree(&rx);
- efree(pmatch);
+ zval_ptr_dtor(&subpats);
+ FREE_ZVAL(retval);
}
- if (v == (uint64_t)-1) {
- return -1;
- }
- break;
+ efree(Z_STRVAL_P(pattern));
+ efree(pattern);
+ break;
}
+
+
default:
file_magerror(ms, "invalid type %d in magiccheck()", m->type);
return -1;
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php