Hello, I've just written sqlite3 loadable module which implements REGEXP operator with Perl-Compatible Regular Expressions library. It has LRU cache for compiled regular expressions, so it is probably fast.
$ cflags=`pkg-config --cflags sqlite3 libpcre` $ libs=`pkg-config --libs sqlite3 libpcre` $ gcc -shared -o pcre.so -Wall -g -fPIC $cflags pcre.c $libs Note that regular expression flags can be embedded within regular expressions themselves, e.g. sqlite> .load ./pcre.so sqlite> SELECT "asdf" REGEXP "(?i)^A"; 1 sqlite> This is native perl syntax, and this is what POSIX regex(7) does not allow (which makes them pretty much useless for REGEXP syntax). I still consider whether to release the code under the GPL or make it public domain. Are there any other sqlite3 extensions out there yet?
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <pcre.h>
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
typedef struct {
char *s;
pcre *p;
pcre_extra *e;
} cache_entry;
#define CACHE_SIZE 16
static
void regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv)
{
const char *re, *str;
pcre *p;
pcre_extra *e;
assert(argc == 2);
re = (const char *) sqlite3_value_text(argv[0]);
if (!re) {
sqlite3_result_error(ctx, "no regexp", -1);
return;
}
str = (const char *) sqlite3_value_text(argv[1]);
if (!str) {
sqlite3_result_error(ctx, "no string", -1);
return;
}
/* simple LRU cache */
{
int i;
int found = 0;
cache_entry *cache = sqlite3_user_data(ctx);
assert(cache);
for (i = 0; i < CACHE_SIZE && cache[i].s; i++)
if (strcmp(re, cache[i].s) == 0) {
found = 1;
break;
}
if (found) {
if (i > 0) {
cache_entry c = cache[i];
memmove(cache + 1, cache, i * sizeof(cache_entry));
cache[0] = c;
}
}
else {
cache_entry c;
const char *err;
int pos;
c.p = pcre_compile(re, 0, &err, &pos, NULL);
if (!c.p) {
char *e2 = sqlite3_mprintf("%s: %s (offset %d)", re, err, pos);
sqlite3_result_error(ctx, e2, -1);
sqlite3_free(e2);
return;
}
c.e = pcre_study(c.p, 0, &err);
c.s = strdup(re);
if (!c.s) {
sqlite3_result_error(ctx, "strdup: ENOMEM", -1);
pcre_free(c.p);
pcre_free(c.e);
return;
}
i = CACHE_SIZE - 1;
if (cache[i].s) {
free(cache[i].s);
assert(cache[i].p);
pcre_free(cache[i].p);
pcre_free(cache[i].e);
}
memmove(cache + 1, cache, i * sizeof(cache_entry));
cache[0] = c;
}
p = cache[0].p;
e = cache[0].e;
}
{
int rc;
assert(p);
rc = pcre_exec(p, e, str, strlen(str), 0, 0, NULL, 0);
sqlite3_result_int(ctx, rc >= 0);
return;
}
}
int sqlite3_extension_init(sqlite3 *db, char **err, const sqlite3_api_routines
*api)
{
SQLITE_EXTENSION_INIT2(api)
cache_entry *cache = calloc(CACHE_SIZE, sizeof(cache_entry));
if (!cache) {
*err = "calloc: ENOMEM";
return 1;
}
sqlite3_create_function(db, "REGEXP", 2, SQLITE_UTF8, cache, regexp,
NULL, NULL);
return 0;
}
pgpJFDCDbJUAg.pgp
Description: PGP signature

