>     Well, actually, I would prefer to see a proper BNDM
>     implementation in the tree.
>
>     - character classes are handled for free
>       (i.e. a case insensitive search does not take longer)
>
>     - combining shift-and and automata is much faster than our
>       current naive algorithm (I may say so, I wrote it years ago)
>
>     - allows us to combine multiple patterns into one search (I
>       have not studied that part of the paper yet). This would
>       enable us to optimize the case where you pass arrays to
>       str_replace.  Instead of scanning the "haystack" one time
>       per replacement text, we would scan it only once.

Even if the compiling step of the BNDM were to be cached (like the compiled 
pcre regex) in many cases it would still be slower then php_memnstr(). I've 
run a series of tests using a simple BNDM implementation (roughly based on 
your original) and while it is faster in some cases in many cases it is not. 
When the search string is short (<=5 characters) or when it is found near the 
start of the input string regular memnstr search often proves to be faster 
often by fairly significant margin. For refenrece sake I am attaching my BNDM 
& php_memnstr() test sources that I've used.

Ilia
#include <string.h>
#include <stdio.h>

#define NED "Dynamic_DBnested.sql"
#define STR "cvs: pear /Tree/docs Dynamic_DBnested.php Dynamic_DBnested.sql 
Memory_DBnested.php Memory_DBsimple.php Memory_XML.php config.xml 
/Tree/docs/TreeEditor config.xml index.php index.tpl mysql_db.sql treeClass.php 
/Tree/docs/TreeEditor/tmp .htaccess /Tree/docs/TreeView index.php index.tpl 
/Tree/docs/TreeView/tmp .htaccess"

/*
 * BNDM algorithm implementation
 * based on sample code by Sascha Schumann 
(http://www.mail-archive.com/dev@httpd.apache.org/msg00939.html)
 */

typedef struct {
        unsigned int T[256];
        unsigned int x;
} bndm_t;
                
inline static void bndm_compile(bndm_t *t, char *str, int str_len)
{
        size_t p;
        register unsigned int s = 1;
        
        t->x = 1 << (str_len-1);
        memset(t->T, 0, sizeof(unsigned int) * 256);
                  
        for (p = str_len - 1; p > 0; p--) { 
                t->T[(int) str[p]] |= s;
                s <<= 1;
        }
        t->T[(int) str[p]] |= s;

        return;
}

static inline char *bndm (char *haystack, int haystack_len, char *needle, int 
needle_len, bndm_t *t)
{
        size_t p, j, e;
        register unsigned int b;

        p = 0;
        while (p + needle_len <= haystack_len) {
                b = -1;
                e = needle_len;
                j = needle_len - 1;
                do {
                        if ((b &= t->T[ haystack[p + j] ]) == 0) {
                                break;
                        }
                        if ((b & t->x) != 0) {
                                if (j == 0) {
                                        return (haystack + p);
                                }
                                e = j;                
                        }
                        j--;
                        b <<= 1;
                } while (b != 0);
                p += e;
        }

        return NULL;
}

int main()
{
        int n = 5000000;
        char *d;
        bndm_t foo;

        bndm_compile(&foo, NED, sizeof(NED)-1);
                                 
        while (n--) {
                if (!(d = bndm(STR, sizeof(STR)-1, NED, sizeof(NED)-1, &foo))) {
                        break;
                }
        }
        printf("matched on: %s\n", d);
                                                                 
        return 0;
}
#include <string.h>
#include <stdio.h>

#define PAT "Dynamic_DBnested.sql"
#define STR "cvs: pear /Tree/docs Dynamic_DBnested.php Dynamic_DBnested.sql 
Memory_DBnested.php Memory_DBsimple.php Memory_XML.php config.xml 
/Tree/docs/TreeEditor config.xml index.php index.tpl mysql_db.sql treeClass.php 
/Tree/docs/TreeEditor/tmp .htaccess /Tree/docs/TreeView index.php index.tpl 
/Tree/docs/TreeView/tmp .htaccess"

static inline char *php_memnstr(char *haystack, char *needle, int needle_len, char 
*end)
{
        char *p = haystack;
        char ne = needle[needle_len-1];

        end -= needle_len;

        while (p <= end) {
                if ((p = memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
                        if (!memcmp(needle, p, needle_len-1)) {
                                return p;
                        }
                }
                
                if (p == NULL) {
                        return NULL;
                }
                
                p++;
        }
        
        return NULL;
}

int main()
{
        int n = 5000000;
        char *d;
                                 
        while (n--) {
                if (!(d = php_memnstr(STR, PAT, sizeof(PAT)-1, STR + sizeof(STR)-1))) {
                        break;
                }
        }
        printf("matched on: %s\n", d);
                                                                 
        return 0;
}

-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to