Hi,
when using bind variables I get a huge performace drop compared to
using a plain string. The query is demonstrated in the attached file
"problematic_query.py".
The database used can be downloaded from
http://xile.org/le/prepared_statement.zip (1.75 MB)
or generated by using the attached file "create_test_db.py".
Kind regards,
Markus
# -*- coding: utf-8 -*-
import time
from pysqlite2 import dbapi2 as sqlite
print 'pysqlite %s, sqlite %s' % (sqlite.version, sqlite.sqlite_version)
x = sqlite.connect('test.db3')
c = x.cursor()
# get all entries which contain a specific word
start = time.time()
c.execute("""SELECT * FROM entry, word, word_entry WHERE
entry.id = word_entry.entry_id AND
word.id = word_entry.word_id AND
word.word GLOB 'hui*'
""")
print '%.3f seconds' % (time.time() - start) # 0.000 seconds
start = time.time()
c.execute("""SELECT * FROM entry, word, word_entry WHERE
entry.id = word_entry.entry_id AND
word.id = word_entry.word_id AND
word.word GLOB ?
""", ('hui*',))
print '%.3f seconds' % (time.time() - start) # 0.297 seconds
c.close()
x.close()
# -*- coding: utf-8 -*-
from random import choice
import re
from pysqlite2 import dbapi2 as sqlite
##x = sqlite.connect(':memory:')
x = sqlite.connect('test.db3')
c = x.cursor()
def generate_words():
words = []
for count in range(20):
words.append(''.join([choice('aeiou' if i%2 else 'bcdfghklmnprstw') for i in range(4)]))
return ' '.join(words)
# schema
c.execute("""CREATE TABLE entry (
id INTEGER PRIMARY KEY,
note LONGTEXT NOT NULL
)""")
c.execute("""CREATE TABLE word (
id INTEGER PRIMARY KEY,
word VARCHAR(40) NOT NULL UNIQUE
)""")
c.execute("""CREATE TABLE word_entry (
id INTEGER PRIMARY KEY,
word_id INT NOT NULL CONSTRAINT word_id_exists REFERENCES word(id) ,
entry_id INT NOT NULL CONSTRAINT entry_id_exists REFERENCES entry(id)
)""")
c.execute("""CREATE INDEX word_entry_entryIndex ON word_entry (entry_id)""")
c.execute("""CREATE UNIQUE INDEX word_entry_wordEntryIndex ON word_entry (word_id, entry_id)""")
# fill 'entry' table
for count in range(1, 5001):
c.execute('INSERT INTO entry (id, note) VALUES (?, ?)', (count, generate_words()))
# build index
regexp = re.compile('\w+', re.UNICODE)
wordDict = {}
id = 1
c.execute('SELECT id, note FROM entry')
for entry_id, note in c.fetchall():
for word in set(regexp.findall(note.lower())):
if len(word) <= 40:
if word in wordDict:
word_id = wordDict[word]
else:
word_id = id
c.execute('INSERT INTO word (id, word) VALUES (?, ?)', (id, word))
wordDict[word] = id
id += 1
c.execute('INSERT INTO word_entry (word_id, entry_id) VALUES (?, ?)', (word_id, entry_id))
c.close()
x.commit()
x.close()
-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------