scottmac Fri Aug 29 09:50:57 2008 UTC Modified files: /php-src/ext/fileinfo/libmagic apprentice.c file.h print.c softmagic.c Log: MFB: Replace ereg code with pcre, fix duplicate macro names and segfault. Patch by Mikko
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/apprentice.c?r1=1.8&r2=1.9&diff_format=u Index: php-src/ext/fileinfo/libmagic/apprentice.c diff -u php-src/ext/fileinfo/libmagic/apprentice.c:1.8 php-src/ext/fileinfo/libmagic/apprentice.c:1.9 --- php-src/ext/fileinfo/libmagic/apprentice.c:1.8 Tue Aug 26 12:24:12 2008 +++ php-src/ext/fileinfo/libmagic/apprentice.c Fri Aug 29 09:50:57 2008 @@ -1118,7 +1118,7 @@ m->mask_op = 0; if (*l == '~') { - if (!IS_STRING(m->type)) + if (!IS_LIBMAGIC_STRING(m->type)) m->mask_op |= FILE_OPINVERSE; else if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "'~' invalid for string types"); @@ -1128,7 +1128,7 @@ m->str_flags = 0; m->num_mask = 0; if ((op = get_op(*l)) != -1) { - if (!IS_STRING(m->type)) { + if (!IS_LIBMAGIC_STRING(m->type)) { uint64_t val; ++l; m->mask_op |= op; @@ -2066,7 +2066,7 @@ m->offset = swap4((uint32_t)m->offset); m->in_offset = swap4((uint32_t)m->in_offset); m->lineno = swap4((uint32_t)m->lineno); - if (IS_STRING(m->type)) { + if (IS_LIBMAGIC_STRING(m->type)) { m->str_range = swap4(m->str_range); m->str_flags = swap4(m->str_flags); } http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/file.h?r1=1.2&r2=1.3&diff_format=u Index: php-src/ext/fileinfo/libmagic/file.h diff -u php-src/ext/fileinfo/libmagic/file.h:1.2 php-src/ext/fileinfo/libmagic/file.h:1.3 --- php-src/ext/fileinfo/libmagic/file.h:1.2 Tue Aug 26 12:24:12 2008 +++ php-src/ext/fileinfo/libmagic/file.h Fri Aug 29 09:50:57 2008 @@ -46,7 +46,11 @@ #ifdef HAVE_INTTYPES_H #include <inttypes.h> #endif -#include <regex.h> + +#include "php.h" +#include "ext/standard/php_string.h" +#include "ext/pcre/php_pcre.h" + #include <sys/types.h> /* Do this here and now, because struct stat gets re-defined on solaris */ #include <sys/stat.h> @@ -165,7 +169,7 @@ #define FILE_LEDOUBLE 38 #define FILE_NAMES_SIZE 39/* size of array to contain all names */ -#define IS_STRING(t) \ +#define IS_LIBMAGIC_STRING(t) \ ((t) == FILE_STRING || \ (t) == FILE_PSTRING || \ (t) == FILE_BESTRING16 || \ http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/print.c?r1=1.1&r2=1.2&diff_format=u Index: php-src/ext/fileinfo/libmagic/print.c diff -u php-src/ext/fileinfo/libmagic/print.c:1.1 php-src/ext/fileinfo/libmagic/print.c:1.2 --- php-src/ext/fileinfo/libmagic/print.c:1.1 Fri Jul 11 14:13:50 2008 +++ php-src/ext/fileinfo/libmagic/print.c Fri Aug 29 09:50:57 2008 @@ -74,7 +74,7 @@ if (m->mask_op & FILE_OPINVERSE) (void) fputc('~', stderr); - if (IS_STRING(m->type)) { + if (IS_LIBMAGIC_STRING(m->type)) { if (m->str_flags) { (void) fputc('/', stderr); if (m->str_flags & STRING_COMPACT_BLANK) http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/softmagic.c?r1=1.2&r2=1.3&diff_format=u Index: php-src/ext/fileinfo/libmagic/softmagic.c diff -u php-src/ext/fileinfo/libmagic/softmagic.c:1.2 php-src/ext/fileinfo/libmagic/softmagic.c:1.3 --- php-src/ext/fileinfo/libmagic/softmagic.c:1.2 Tue Aug 26 12:24:12 2008 +++ php-src/ext/fileinfo/libmagic/softmagic.c Fri Aug 29 09:50:57 2008 @@ -36,6 +36,10 @@ #include <stdlib.h> #include <time.h> +#ifndef PREG_OFFSET_CAPTURE +# define PREG_OFFSET_CAPTURE (1<<8) +#endif + #ifndef lint FILE_RCSID("@(#)$File: softmagic.c,v 1.117 2008/03/01 22:21:49 rrt Exp $") @@ -281,25 +285,18 @@ private int check_fmt(struct magic_set *ms, struct magic *m) { - regex_t rx = {0}; - int rc; - + pcre *pce; + int re_options; + pcre_extra *re_extra; + if (strchr(MAGIC_DESC, '%') == NULL) { return 0; } - - rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); - if (rc) { - char errmsg[512]; - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regex error %d, (%s)", rc, errmsg); + + if ((pce = pcre_get_compiled_regex("~%[-0-9.]*s~", &re_extra, &re_options TSRMLS_CC)) == NULL) { return -1; } else { - regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1); - rc = regexec(&rx, MAGIC_DESC, rx.re_nsub + 1, pmatch, 0); - efree(pmatch); - regfree(&rx); - return !rc; + return !pcre_exec(pce, re_extra, MAGIC_DESC, strlen(MAGIC_DESC), 0, re_options, NULL, 0); } } @@ -1488,6 +1485,66 @@ return file_strncmp(a, b, len, flags); } +private void +convert_libmagic_pattern(zval *pattern, int options) +{ + int i, j=0; + char *t; + + t = (char *) safe_emalloc(Z_STRLEN_P(pattern), 2, 5); + memset(t, '\0', sizeof(t)); + + t[j++] = '~'; + + for (i=0; i<Z_STRLEN_P(pattern); i++, j++) { + switch (Z_STRVAL_P(pattern)[i]) { + case '?': + t[j] = '.'; + break; + case '*': + t[j++] = '.'; + t[j] = '*'; + break; + case '.': + t[j++] = '\\'; + t[j] = '.'; + break; + case '\\': + t[j++] = '\\'; + t[j] = '\\'; + break; + case '(': + t[j++] = '\\'; + t[j] = '('; + break; + case ')': + t[j++] = '\\'; + t[j] = ')'; + break; + case '~': + t[j++] = '\\'; + t[j] = '~'; + break; + default: + t[j] = Z_STRVAL_P(pattern)[i]; + break; + } + } + t[j++] = '~'; + + if (options & PCRE_CASELESS) + t[j++] = 'm'; + + if (options & PCRE_MULTILINE) + t[j++] = 'i'; + + t[j]=0; + + Z_STRVAL_P(pattern) = t; + Z_STRLEN_P(pattern) = j; + +} + private int magiccheck(struct magic_set *ms, struct magic *m) { @@ -1642,61 +1699,156 @@ } break; } + case FILE_REGEX: { - int rc; - regex_t rx = {0}; - char errmsg[512]; - - if (ms->search.s == NULL) - return 0; + zval *pattern; + int options = 0; + pcre_cache_entry *pce; + + MAKE_STD_ZVAL(pattern); + Z_STRVAL_P(pattern) = (char *)m->value.s; + Z_STRLEN_P(pattern) = m->vallen; + Z_TYPE_P(pattern) = IS_STRING; + + options |= PCRE_MULTILINE; + + if (m->str_flags & STRING_IGNORE_CASE) { + options |= PCRE_CASELESS; + } + + convert_libmagic_pattern(pattern, options); - l = 0; - rc = regcomp(&rx, m->value.s, REG_EXTENDED|REG_NEWLINE|((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); - if (rc) { - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regex error %d, (%s)", rc, errmsg); - v = (uint64_t)-1; + if ((pce = pcre_get_compiled_regex_cache(Z_STRVAL_P(pattern), Z_STRLEN_P(pattern) TSRMLS_CC)) == NULL) { + return -1; } else { - regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1); -#ifndef REG_STARTEND -#define REG_STARTEND 0 - size_t l = ms->search.s_len - 1; - char c = ms->search.s[l]; - ((char *)(intptr_t)ms->search.s)[l] = '\0'; -#else - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = ms->search.s_len; -#endif - rc = regexec(&rx, (const char *)ms->search.s, 1, pmatch, REG_STARTEND); -#if REG_STARTEND == 0 - ((char *)(intptr_t)ms->search.s)[l] = c; -#endif - switch (rc) { - case 0: - ms->search.s += (int)pmatch[0].rm_so; - ms->search.offset += (size_t)pmatch[0].rm_so; - ms->search.rm_len = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so); - v = 0; - break; + /* pce now contains the compiled regex */ + zval *retval; + zval *subpats; + char *haystack; + + MAKE_STD_ZVAL(retval); + ALLOC_INIT_ZVAL(subpats); + + /* Cut the search len from haystack, equals to REG_STARTEND */ + haystack = estrndup(ms->search.s, ms->search.s_len); - case REG_NOMATCH: - v = 1; - break; + /* match v = 0, no match v = 1 */ + php_pcre_match_impl(pce, haystack, ms->search.s_len, retval, subpats, 1, 1, PREG_OFFSET_CAPTURE, 0 TSRMLS_CC); + + /* Free haystack */ + efree(haystack); + + if (Z_LVAL_P(retval) < 0) { + zval_ptr_dtor(&subpats); + FREE_ZVAL(retval); + efree(Z_STRVAL_P(pattern)); + efree(pattern); + return -1; + } else if ((Z_LVAL_P(retval) > 0) && (Z_TYPE_P(subpats) == IS_ARRAY)) { + + /* Need to fetch global match which equals pmatch[0] */ + HashTable *ht = Z_ARRVAL_P(subpats); + HashPosition outer_pos; + zval *pattern_match = NULL, *pattern_offset = NULL; + + zend_hash_internal_pointer_reset_ex(ht, &outer_pos); + + if (zend_hash_has_more_elements_ex(ht, &outer_pos) == SUCCESS && + zend_hash_move_forward_ex(ht, &outer_pos)) { + + zval **ppzval; + + /* The first element (should be) is the global match + Need to move to the inner array to get the global match */ + + if (zend_hash_get_current_data_ex(ht, (void**)&ppzval, &outer_pos) != FAILURE) { + + HashTable *inner_ht; + HashPosition inner_pos; + zval **match, **offset; + zval tmpcopy = **ppzval, matchcopy, offsetcopy; + + zval_copy_ctor(&tmpcopy); + INIT_PZVAL(&tmpcopy); + + inner_ht = Z_ARRVAL(tmpcopy); + + /* If everything goes according to the master plan + tmpcopy now contains two elements: + 0 = the match + 1 = starting position of the match */ + zend_hash_internal_pointer_reset_ex(inner_ht, &inner_pos); + + if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS && + zend_hash_move_forward_ex(inner_ht, &inner_pos)) { + + if (zend_hash_get_current_data_ex(inner_ht, (void**)&match, &inner_pos) != FAILURE) { + + matchcopy = **match; + zval_copy_ctor(&matchcopy); + INIT_PZVAL(&matchcopy); + convert_to_string(&matchcopy); + + MAKE_STD_ZVAL(pattern_match); + Z_STRVAL_P(pattern_match) = (char *)Z_STRVAL(matchcopy); + Z_STRLEN_P(pattern_match) = Z_STRLEN(matchcopy); + Z_TYPE_P(pattern_match) = IS_STRING; + + zval_dtor(&matchcopy); + } + } + + if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS && + zend_hash_move_forward_ex(inner_ht, &inner_pos)) { + + if (zend_hash_get_current_data_ex(inner_ht, (void**)&offset, &inner_pos) != FAILURE) { + + offsetcopy = **offset; + zval_copy_ctor(&offsetcopy); + INIT_PZVAL(&offsetcopy); + convert_to_long(&offsetcopy); + + MAKE_STD_ZVAL(pattern_offset); + Z_LVAL_P(pattern_offset) = Z_LVAL(offsetcopy); + Z_TYPE_P(pattern_offset) = IS_LONG; + + zval_dtor(&offsetcopy); + } + } + zval_dtor(&tmpcopy); + } + + if ((pattern_match != NULL) && (pattern_offset != NULL)) { + ms->search.s += (int)Z_LVAL_P(pattern_offset); /* this is where the match starts */ + ms->search.offset += (size_t)Z_LVAL_P(pattern_offset); /* this is where the match starts as size_t */ + ms->search.rm_len = Z_STRLEN_P(pattern_match) /* This is the length of the matched pattern */; + v = 0; + + efree(pattern_match); + efree(pattern_offset); + + } else { + zval_ptr_dtor(&subpats); + FREE_ZVAL(retval); + efree(Z_STRVAL_P(pattern)); + efree(pattern); + return -1; + } + } - default: - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regexec error %d, (%s)", rc, errmsg); - v = (uint64_t)-1; - break; + + } else { + v = 1; } - regfree(&rx); - efree(pmatch); + zval_ptr_dtor(&subpats); + FREE_ZVAL(retval); } - if (v == (uint64_t)-1) { - return -1; - } - break; + efree(Z_STRVAL_P(pattern)); + efree(pattern); + break; } + + default: file_magerror(ms, "invalid type %d in magiccheck()", m->type); return -1;
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php