Hello,

I've just written sqlite3 loadable module which implements REGEXP
operator with Perl-Compatible Regular Expressions library.  It has
LRU cache for compiled regular expressions, so it is probably fast.

$ cflags=`pkg-config --cflags sqlite3 libpcre`
$ libs=`pkg-config --libs sqlite3 libpcre`
$ gcc -shared -o pcre.so -Wall -g -fPIC $cflags pcre.c $libs

Note that regular expression flags can be embedded within regular
expressions themselves, e.g.

sqlite> .load ./pcre.so
sqlite> SELECT "asdf" REGEXP "(?i)^A";
1
sqlite> 

This is native perl syntax, and this is what POSIX regex(7) does
not allow (which makes them pretty much useless for REGEXP syntax).

I still consider whether to release the code under the GPL or 
make it public domain.  Are there any other sqlite3 extensions
out there yet?
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1

typedef struct {
    char *s;
    pcre *p;
    pcre_extra *e;
} cache_entry;

#define CACHE_SIZE 16

static
void regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv)
{
    const char *re, *str;
    pcre *p;
    pcre_extra *e;

    assert(argc == 2);

    re = (const char *) sqlite3_value_text(argv[0]);
    if (!re) {
        sqlite3_result_error(ctx, "no regexp", -1);
        return;
    }

    str = (const char *) sqlite3_value_text(argv[1]);
    if (!str) {
        sqlite3_result_error(ctx, "no string", -1);
        return;
    }

    /* simple LRU cache */
    {
        int i;
        int found = 0;
        cache_entry *cache = sqlite3_user_data(ctx);

        assert(cache);

        for (i = 0; i < CACHE_SIZE && cache[i].s; i++)
            if (strcmp(re, cache[i].s) == 0) {
                found = 1;
                break;
            }
        if (found) {
            if (i > 0) {
                cache_entry c = cache[i];
                memmove(cache + 1, cache, i * sizeof(cache_entry));
                cache[0] = c;
            }
        }
        else {
            cache_entry c;
            const char *err;
            int pos;
            c.p = pcre_compile(re, 0, &err, &pos, NULL);
            if (!c.p) {
                char *e2 = sqlite3_mprintf("%s: %s (offset %d)", re, err, pos);
                sqlite3_result_error(ctx, e2, -1);
                sqlite3_free(e2);
                return;
            }
            c.e = pcre_study(c.p, 0, &err);
            c.s = strdup(re);
            if (!c.s) {
                sqlite3_result_error(ctx, "strdup: ENOMEM", -1);
                pcre_free(c.p);
                pcre_free(c.e);
                return;
            }
            i = CACHE_SIZE - 1;
            if (cache[i].s) {
                free(cache[i].s);
                assert(cache[i].p);
                pcre_free(cache[i].p);
                pcre_free(cache[i].e);
            }
            memmove(cache + 1, cache, i * sizeof(cache_entry));
            cache[0] = c;
        }
        p = cache[0].p;
        e = cache[0].e;
    }

    {
        int rc;
        assert(p);
        rc = pcre_exec(p, e, str, strlen(str), 0, 0, NULL, 0);
        sqlite3_result_int(ctx, rc >= 0);
        return;
    }
}

int sqlite3_extension_init(sqlite3 *db, char **err, const sqlite3_api_routines 
*api)
{
        SQLITE_EXTENSION_INIT2(api)
        cache_entry *cache = calloc(CACHE_SIZE, sizeof(cache_entry));
        if (!cache) {
            *err = "calloc: ENOMEM";
            return 1;
        }
        sqlite3_create_function(db, "REGEXP", 2, SQLITE_UTF8, cache, regexp, 
NULL, NULL);
        return 0;
}

Attachment: pgpJFDCDbJUAg.pgp
Description: PGP signature

Reply via email to