scottmac                Fri Aug 29 09:50:57 2008 UTC

  Modified files:              
    /php-src/ext/fileinfo/libmagic      apprentice.c file.h print.c 
                                        softmagic.c 
  Log:
  MFB: Replace ereg code with pcre, fix duplicate macro names and segfault. 
Patch by Mikko
  
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/apprentice.c?r1=1.8&r2=1.9&diff_format=u
Index: php-src/ext/fileinfo/libmagic/apprentice.c
diff -u php-src/ext/fileinfo/libmagic/apprentice.c:1.8 
php-src/ext/fileinfo/libmagic/apprentice.c:1.9
--- php-src/ext/fileinfo/libmagic/apprentice.c:1.8      Tue Aug 26 12:24:12 2008
+++ php-src/ext/fileinfo/libmagic/apprentice.c  Fri Aug 29 09:50:57 2008
@@ -1118,7 +1118,7 @@
 
        m->mask_op = 0;
        if (*l == '~') {
-               if (!IS_STRING(m->type))
+               if (!IS_LIBMAGIC_STRING(m->type))
                        m->mask_op |= FILE_OPINVERSE;
                else if (ms->flags & MAGIC_CHECK)
                        file_magwarn(ms, "'~' invalid for string types");
@@ -1128,7 +1128,7 @@
        m->str_flags = 0;
        m->num_mask = 0;
        if ((op = get_op(*l)) != -1) {
-               if (!IS_STRING(m->type)) {
+               if (!IS_LIBMAGIC_STRING(m->type)) {
                        uint64_t val;
                        ++l;
                        m->mask_op |= op;
@@ -2066,7 +2066,7 @@
        m->offset = swap4((uint32_t)m->offset);
        m->in_offset = swap4((uint32_t)m->in_offset);
        m->lineno = swap4((uint32_t)m->lineno);
-       if (IS_STRING(m->type)) {
+       if (IS_LIBMAGIC_STRING(m->type)) {
                m->str_range = swap4(m->str_range);
                m->str_flags = swap4(m->str_flags);
        }
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/file.h?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/fileinfo/libmagic/file.h
diff -u php-src/ext/fileinfo/libmagic/file.h:1.2 
php-src/ext/fileinfo/libmagic/file.h:1.3
--- php-src/ext/fileinfo/libmagic/file.h:1.2    Tue Aug 26 12:24:12 2008
+++ php-src/ext/fileinfo/libmagic/file.h        Fri Aug 29 09:50:57 2008
@@ -46,7 +46,11 @@
 #ifdef HAVE_INTTYPES_H
 #include <inttypes.h>
 #endif
-#include <regex.h>
+
+#include "php.h"
+#include "ext/standard/php_string.h"
+#include "ext/pcre/php_pcre.h"
+
 #include <sys/types.h>
 /* Do this here and now, because struct stat gets re-defined on solaris */
 #include <sys/stat.h>
@@ -165,7 +169,7 @@
 #define                                FILE_LEDOUBLE   38
 #define                                FILE_NAMES_SIZE 39/* size of array to 
contain all names */
 
-#define IS_STRING(t) \
+#define IS_LIBMAGIC_STRING(t) \
        ((t) == FILE_STRING || \
         (t) == FILE_PSTRING || \
         (t) == FILE_BESTRING16 || \
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/print.c?r1=1.1&r2=1.2&diff_format=u
Index: php-src/ext/fileinfo/libmagic/print.c
diff -u php-src/ext/fileinfo/libmagic/print.c:1.1 
php-src/ext/fileinfo/libmagic/print.c:1.2
--- php-src/ext/fileinfo/libmagic/print.c:1.1   Fri Jul 11 14:13:50 2008
+++ php-src/ext/fileinfo/libmagic/print.c       Fri Aug 29 09:50:57 2008
@@ -74,7 +74,7 @@
        if (m->mask_op & FILE_OPINVERSE)
                (void) fputc('~', stderr);
 
-       if (IS_STRING(m->type)) {
+       if (IS_LIBMAGIC_STRING(m->type)) {
                if (m->str_flags) {
                        (void) fputc('/', stderr);
                        if (m->str_flags & STRING_COMPACT_BLANK) 
http://cvs.php.net/viewvc.cgi/php-src/ext/fileinfo/libmagic/softmagic.c?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/fileinfo/libmagic/softmagic.c
diff -u php-src/ext/fileinfo/libmagic/softmagic.c:1.2 
php-src/ext/fileinfo/libmagic/softmagic.c:1.3
--- php-src/ext/fileinfo/libmagic/softmagic.c:1.2       Tue Aug 26 12:24:12 2008
+++ php-src/ext/fileinfo/libmagic/softmagic.c   Fri Aug 29 09:50:57 2008
@@ -36,6 +36,10 @@
 #include <stdlib.h>
 #include <time.h>
 
+#ifndef PREG_OFFSET_CAPTURE
+# define PREG_OFFSET_CAPTURE                 (1<<8)
+#endif
+
 
 #ifndef        lint
 FILE_RCSID("@(#)$File: softmagic.c,v 1.117 2008/03/01 22:21:49 rrt Exp $")
@@ -281,25 +285,18 @@
 private int
 check_fmt(struct magic_set *ms, struct magic *m)
 {
-       regex_t rx = {0};
-       int rc;
-
+       pcre *pce;
+       int re_options;
+       pcre_extra *re_extra;
+       
        if (strchr(MAGIC_DESC, '%') == NULL) {
                return 0;
        }
-
-       rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
-       if (rc) {
-               char errmsg[512];
-               (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
-               file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
+       
+       if ((pce = pcre_get_compiled_regex("~%[-0-9.]*s~", &re_extra, 
&re_options TSRMLS_CC)) == NULL) {
                return -1;
        } else {
-               regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), 
rx.re_nsub + 1);
-               rc = regexec(&rx, MAGIC_DESC, rx.re_nsub + 1, pmatch, 0);
-               efree(pmatch);
-               regfree(&rx);
-               return !rc;
+               return !pcre_exec(pce, re_extra, MAGIC_DESC, 
strlen(MAGIC_DESC), 0, re_options, NULL, 0);
        }
 }
 
@@ -1488,6 +1485,66 @@
        return file_strncmp(a, b, len, flags);
 }
 
+private void
+convert_libmagic_pattern(zval *pattern, int options)
+{
+               int i, j=0;
+               char *t;
+
+               t = (char *) safe_emalloc(Z_STRLEN_P(pattern), 2, 5);
+               memset(t, '\0', sizeof(t));
+               
+               t[j++] = '~';
+               
+               for (i=0; i<Z_STRLEN_P(pattern); i++, j++) {
+                       switch (Z_STRVAL_P(pattern)[i]) {
+                               case '?':
+                                       t[j] = '.';
+                                       break;
+                               case '*':
+                                       t[j++] = '.';
+                                       t[j] = '*';
+                                       break;
+                               case '.':
+                                       t[j++] = '\\';
+                                       t[j] = '.';
+                                       break;
+                               case '\\':
+                                       t[j++] = '\\';
+                                       t[j] = '\\';
+                                       break;
+                               case '(':
+                                       t[j++] = '\\';
+                                       t[j] = '(';
+                                       break;
+                               case ')':
+                                       t[j++] = '\\';
+                                       t[j] = ')';
+                                       break;
+                               case '~':
+                                       t[j++] = '\\';
+                                       t[j] = '~';
+                                       break;
+                               default:
+                                       t[j] = Z_STRVAL_P(pattern)[i];
+                                       break;
+                       }
+               }
+               t[j++] = '~';
+       
+               if (options & PCRE_CASELESS) 
+                       t[j++] = 'm';
+       
+               if (options & PCRE_MULTILINE)
+                       t[j++] = 'i';
+
+               t[j]=0;
+       
+               Z_STRVAL_P(pattern) = t;
+               Z_STRLEN_P(pattern) = j;
+
+}
+
 private int
 magiccheck(struct magic_set *ms, struct magic *m)
 {
@@ -1642,61 +1699,156 @@
                }
                break;
        }
+                       
        case FILE_REGEX: {
-               int rc;
-               regex_t rx = {0};
-               char errmsg[512];
-
-               if (ms->search.s == NULL)
-                       return 0;
+               zval *pattern;
+               int options = 0;
+               pcre_cache_entry *pce;
+               
+               MAKE_STD_ZVAL(pattern);
+               Z_STRVAL_P(pattern) = (char *)m->value.s;
+               Z_STRLEN_P(pattern) = m->vallen;
+               Z_TYPE_P(pattern) = IS_STRING; 
+       
+               options |= PCRE_MULTILINE;
+               
+               if (m->str_flags & STRING_IGNORE_CASE) {
+                       options |= PCRE_CASELESS;
+               }
+               
+               convert_libmagic_pattern(pattern, options);
 
-               l = 0;
-               rc = regcomp(&rx, m->value.s, 
REG_EXTENDED|REG_NEWLINE|((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
-               if (rc) {
-                       (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
-                       file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
-                       v = (uint64_t)-1;
+               if ((pce = pcre_get_compiled_regex_cache(Z_STRVAL_P(pattern), 
Z_STRLEN_P(pattern) TSRMLS_CC)) == NULL) {
+                       return -1;
                } else {
-                       regmatch_t *pmatch = (regmatch_t 
*)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1);
-#ifndef REG_STARTEND
-#define        REG_STARTEND    0
-                       size_t l = ms->search.s_len - 1;
-                       char c = ms->search.s[l];
-                       ((char *)(intptr_t)ms->search.s)[l] = '\0';
-#else
-                       pmatch[0].rm_so = 0;
-                       pmatch[0].rm_eo = ms->search.s_len;
-#endif
-                       rc = regexec(&rx, (const char *)ms->search.s, 1, 
pmatch, REG_STARTEND);
-#if REG_STARTEND == 0
-                       ((char *)(intptr_t)ms->search.s)[l] = c;
-#endif
-                       switch (rc) {
-                       case 0:
-                               ms->search.s += (int)pmatch[0].rm_so;
-                               ms->search.offset += (size_t)pmatch[0].rm_so;
-                               ms->search.rm_len = (size_t)(pmatch[0].rm_eo - 
pmatch[0].rm_so);
-                               v = 0;
-                               break;
+                       /* pce now contains the compiled regex */
+                       zval *retval;
+                       zval *subpats;
+                       char *haystack;
+                       
+                       MAKE_STD_ZVAL(retval);
+                       ALLOC_INIT_ZVAL(subpats);
+                       
+                       /* Cut the search len from haystack, equals to 
REG_STARTEND */
+                       haystack = estrndup(ms->search.s, ms->search.s_len);
 
-                       case REG_NOMATCH:
-                               v = 1;
-                               break;
+                       /* match v = 0, no match v = 1 */
+                       php_pcre_match_impl(pce, haystack, ms->search.s_len, 
retval, subpats, 1, 1, PREG_OFFSET_CAPTURE, 0 TSRMLS_CC);
+                       
+                       /* Free haystack */
+                       efree(haystack);
+                       
+                       if (Z_LVAL_P(retval) < 0) {
+                               zval_ptr_dtor(&subpats);
+                               FREE_ZVAL(retval);
+                               efree(Z_STRVAL_P(pattern));
+                               efree(pattern);
+                               return -1;
+                       } else if ((Z_LVAL_P(retval) > 0) && (Z_TYPE_P(subpats) 
== IS_ARRAY)) {
+                               
+                               /* Need to fetch global match which equals 
pmatch[0] */
+                               HashTable *ht = Z_ARRVAL_P(subpats);
+                               HashPosition outer_pos;
+                               zval *pattern_match = NULL, *pattern_offset = 
NULL;
+                               
+                               zend_hash_internal_pointer_reset_ex(ht, 
&outer_pos); 
+                               
+                               if (zend_hash_has_more_elements_ex(ht, 
&outer_pos) == SUCCESS &&
+                                       zend_hash_move_forward_ex(ht, 
&outer_pos)) {
+                                       
+                                       zval **ppzval;
+                                       
+                                       /* The first element (should be) is the 
global match 
+                                          Need to move to the inner array to 
get the global match */
+                                       
+                                       if (zend_hash_get_current_data_ex(ht, 
(void**)&ppzval, &outer_pos) != FAILURE) { 
+                                               
+                                               HashTable *inner_ht;
+                                               HashPosition inner_pos;
+                                               zval **match, **offset;
+                                               zval tmpcopy = **ppzval, 
matchcopy, offsetcopy;
+                                               
+                                               zval_copy_ctor(&tmpcopy); 
+                                               INIT_PZVAL(&tmpcopy);
+                                               
+                                               inner_ht = Z_ARRVAL(tmpcopy);
+                                               
+                                               /* If everything goes according 
to the master plan
+                                                  tmpcopy now contains two 
elements:
+                                                  0 = the match
+                                                  1 = starting position of the 
match */
+                                               
zend_hash_internal_pointer_reset_ex(inner_ht, &inner_pos); 
+                                               
+                                               if 
(zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
+                                                       
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
+                                               
+                                                       if 
(zend_hash_get_current_data_ex(inner_ht, (void**)&match, &inner_pos) != 
FAILURE) { 
+                                                                       
+                                                               matchcopy = 
**match;
+                                                               
zval_copy_ctor(&matchcopy);
+                                                               
INIT_PZVAL(&matchcopy);
+                                                               
convert_to_string(&matchcopy); 
+                                                               
+                                                               
MAKE_STD_ZVAL(pattern_match);
+                                                               
Z_STRVAL_P(pattern_match) = (char *)Z_STRVAL(matchcopy);
+                                                               
Z_STRLEN_P(pattern_match) = Z_STRLEN(matchcopy);
+                                                               
Z_TYPE_P(pattern_match) = IS_STRING; 
+
+                                                               
zval_dtor(&matchcopy);
+                                                       }
+                                               }
+                                               
+                                               if 
(zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
+                                                       
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
+                                                       
+                                                       if 
(zend_hash_get_current_data_ex(inner_ht, (void**)&offset, &inner_pos) != 
FAILURE) { 
+                                                               
+                                                               offsetcopy = 
**offset;
+                                                               
zval_copy_ctor(&offsetcopy);
+                                                               
INIT_PZVAL(&offsetcopy);
+                                                               
convert_to_long(&offsetcopy); 
+                                                               
+                                                               
MAKE_STD_ZVAL(pattern_offset);
+                                                               
Z_LVAL_P(pattern_offset) = Z_LVAL(offsetcopy);
+                                                               
Z_TYPE_P(pattern_offset) = IS_LONG;
+                                                               
+                                                               
zval_dtor(&offsetcopy);
+                                                       }
+                                               }
+                                               zval_dtor(&tmpcopy);    
+                                       }
+                                       
+                                       if ((pattern_match != NULL) && 
(pattern_offset != NULL)) {
+                                               ms->search.s += 
(int)Z_LVAL_P(pattern_offset); /* this is where the match starts */
+                                               ms->search.offset += 
(size_t)Z_LVAL_P(pattern_offset); /* this is where the match starts as size_t */
+                                               ms->search.rm_len = 
Z_STRLEN_P(pattern_match) /* This is the length of the matched pattern */;
+                                               v = 0;
+                                               
+                                               efree(pattern_match);
+                                               efree(pattern_offset);
+                                               
+                                       } else {
+                                               zval_ptr_dtor(&subpats);
+                                               FREE_ZVAL(retval);
+                                               efree(Z_STRVAL_P(pattern));
+                                               efree(pattern);
+                                               return -1;
+                                       }                                       
+                               }
 
-                       default:
-                               (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
-                               file_magerror(ms, "regexec error %d, (%s)", rc, 
errmsg);
-                               v = (uint64_t)-1;
-                               break;
+                               
+                       } else {
+                               v = 1;
                        }
-                       regfree(&rx);
-                       efree(pmatch);
+                       zval_ptr_dtor(&subpats);
+                       FREE_ZVAL(retval);
                }
-               if (v == (uint64_t)-1) {
-                       return -1;
-               }
-               break;
+               efree(Z_STRVAL_P(pattern));
+               efree(pattern);
+               break;  
        }
+        
+                        
        default:
                file_magerror(ms, "invalid type %d in magiccheck()", m->type);
                return -1;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to