Update of /cvsroot/freevo/kaa/vfs/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3312/src

Added Files:
        .cvsignore __init__.py db.py 
Log Message:
Create vfs module, added cleaned up database code for proposed design.


--- NEW FILE: db.py ---
import string
import os
import time
import cPickle
from pysqlite2 import dbapi2 as sqlite

CREATE_SCHEMA = """
    CREATE TABLE meta (
        attr        TEXT UNIQUE, 
        value       TEXT
    );
    INSERT INTO meta VALUES('filecount', 0);
    INSERT INTO meta VALUES('version', 0.1);

    CREATE TABLE types (
        id              INTEGER PRIMARY KEY AUTOINCREMENT, 
        name            TEXT UNIQUE,
        attrs_pickle    TEXT
    );

    CREATE TABLE words (
        id              INTEGER PRIMARY KEY AUTOINCREMENT,
        word            TEXT,
        count           INTEGER
    );
    CREATE UNIQUE INDEX words_idx on WORDS (word) ON CONFLICT REPLACE;

    CREATE TABLE words_map (
        rank            INTEGER,
        word_id         INTEGER,
        file_type       INTEGER,
        file_id         INTEGER,
        frequency       FLOAT
    );
    CREATE INDEX words_map_idx ON words_map (word_id, rank, file_type);
"""


ATTR_SIMPLE       = 0x00
ATTR_SEARCHABLE   = 0x01      # Is a SQL column, not a pickled field
ATTR_INDEXED      = 0x02      # Will have an SQL index
ATTR_KEYWORDS     = 0x04      # Also indexed for keyword queries


class Database:
    def __init__(self, dbfile = None):
        if not dbfile:
            dbfile = "kaavfs.sqlite"

        self._object_types = {}
        self._dbfile = dbfile
        self._open_db()

    def __del__(self):
        self._db.commit()

    def _open_db(self):
        self._db = sqlite.connect(self._dbfile)
        self._cursor = self._db.cursor()
        self._cursor.execute("PRAGMA synchronous=OFF")
        self._cursor.execute("PRAGMA count_changes=OFF")
        self._cursor.execute("PRAGMA cache_size=50000")

        if not self.check_table_exists("meta"):
            self._db.close()
            self._create_db()

        self._load_object_types()

    def _db_query(self, statement, args = ()):
        self._cursor.execute(statement, args)
        rows = self._cursor.fetchall()
        return rows

    def _db_query_row(self, statement, args = ()):
        rows = self._db_query(statement, args)
        if len(rows) == 0:
            return None
        return rows[0]

    def check_table_exists(self, table):
        res = self._db_query_row("SELECT name FROM sqlite_master where " \
                         "name=? and type='table'", (table,))
        return res != None


    def _create_db(self):
        try:
            os.unlink(self._dbfile)
        except:
            pass
        f = os.popen("sqlite3 %s" % self._dbfile, "w")
        f.write(CREATE_SCHEMA)
        f.close()
        self._open_db()

        self.register_object_type_attrs("dir")


    def register_object_type_attrs(self, type_name, attr_list = ()):
        if type_name in self._object_types:
            # This type already exists.  Compare given attributes with
            # existing attributes for this type.
            cur_type_id, cur_type_attrs = self._object_types[type_name]
            new_attrs = {}
            db_needs_update = False
            for name, type, flags in attr_list:
                if name not in cur_type_attrs:
                    new_attrs[name] = type, flags
                    if flags:
                        # New attribute isn't simple, needs to alter table.
                        db_needs_update = True

            if len(new_attrs) == 0:
                # All these attributes are already registered; nothing to do.
                return

            if not db_needs_update:
                # Only simple (i.e. pickled only) attributes are added, so we
                # don't need to alter the table, just update the types table.
                cur_type_attrs.update(new_attrs)
                self._db_query("UPDATE types SET attrs_pickle=? WHERE id=?",
                           (cPickle.dumps(cur_type_attrs), cur_type_id))
                return

            # Update the attr list to merge both existing and new attributes.
            # We need to update the database now.
            attr_list = []
            for name, (type, flags) in cur_type_attrs.items() + 
new_attrs.items():
                attr_list.append((name, type, flags))

        else:
            new_attrs = {}
            # Merge standard attributes with user attributes for this type.
            attr_list = (
                ("id", int, ATTR_SEARCHABLE),
                ("name", str, ATTR_KEYWORDS),
                ("parent_id", int, ATTR_SEARCHABLE),
                ("parent_type", int, ATTR_SEARCHABLE),
                ("size", int, ATTR_SIMPLE),
                ("mtime", int, ATTR_SEARCHABLE),
                ("pickle", str, ATTR_SEARCHABLE),
            ) + tuple(attr_list)

        table_name = "objects_%s" % type_name

        create_stmt = "CREATE TABLE %s_tmp ("% table_name

        # Iterate through type attributes and append to SQL create statement.
        attrs = {}
        for name, type, flags in attr_list:
            # If flags is non-zero it means this attribute needs to be a
            # column in the table, not a pickled value.
            if flags:
                sql_types = {str: "TEXT", int: "INTEGER", float: "FLOAT"}
                assert(type in sql_types)
                create_stmt += "%s %s" % (name, sql_types[type])
                if name == "id":
                    # Special case, these are auto-incrementing primary keys
                    create_stmt += " PRIMARY KEY AUTOINCREMENT"
                create_stmt += ","

            attrs[name] = (type, flags)

        create_stmt = create_stmt[:-1]
        create_stmt += ")"
        self._db_query(create_stmt)

        # Add this type to the types table, including the attributes
        # dictionary.
        self._db_query("INSERT OR REPLACE INTO types VALUES(NULL, ?, ?)", 
                       (type_name, cPickle.dumps(attrs)))

        if new_attrs:
            # Migrate rows from old table to new one.
            columns = filter(lambda x: cur_type_attrs[x][1], 
cur_type_attrs.keys())
            columns = string.join(columns, ",")
            self._db_query("INSERT INTO %s_tmp (%s) SELECT %s FROM %s" % \
                           (table_name, columns, columns, table_name))

            # Delete old table.
            self._db_query("DROP TABLE %s" % table_name)

        # Rename temporary table.
        self._db_query("ALTER TABLE %s_tmp RENAME TO %s" % \
                       (table_name, table_name))


        # Create index for locating object by full path (i.e. parent + name)
        self._db_query("CREATE UNIQUE INDEX %s_parent_name_idx on %s "\
                       "(parent_id, parent_type, name)" % \
                       (table_name, table_name))
        # Create index for locating all objects under a given parent.
        self._db_query("CREATE INDEX %s_parent_idx on %s (parent_id, "\
                       "parent_type)" % (table_name, table_name))

        # If any of these attributes need to be indexed, create the index
        # for that column.  TODO: need to support indexes on multiple
        # columns.
        for name, type, flags in attr_list:
            if flags & ATTR_INDEXED:
                self._db_query("CREATE INDEX %s_%s_idx ON %s (%s)" % \
                               (table_name, name, table_name, name))

        self._load_object_types()


    def _load_object_types(self):
        for id, name, attrs in self._db_query("SELECT * from types"):
            self._object_types[name] = id, cPickle.loads(attrs.encode("utf-8"))
    

    def _make_query_from_attrs(self, query_type, attrs, type_name):
        type_attrs = self._object_types[type_name][1]

        columns = []
        values = []
        placeholders = []

        for key in attrs.keys():
            if attrs[key] == None:
                del attrs[key]
        attrs_copy = attrs.copy()
        for name, (type, flags) in type_attrs.items():
            if flags != ATTR_SIMPLE:
                columns.append(name)
                placeholders.append("?")
                if name in attrs:
                    values.append(attrs[name])
                    del attrs_copy[name]
                else:
                    values.append(None)

        if len(attrs_copy) > 0:
            values[columns.index("pickle")] = cPickle.dumps(attrs_copy)
        else:
            values[columns.index("pickle")] = None

        table_name = "objects_" + type_name

        if query_type == "add":
            columns = string.join(columns, ",")
            placeholders = string.join(placeholders, ",")
            q = "INSERT INTO %s (%s) VALUES(%s)" % (table_name, columns, 
placeholders)
        else:
            q = "UPDATE %s SET " % table_name
            for col, ph in zip(columns, placeholders):
                q += "%s=%s," % (col, ph)
            # Trim off last comma
            q = q[:-1]
            q += " WHERE id=?"
            values.append(attrs["id"])

        # TODO: keyword indexing for ATTR_KEYWORDS attributes.

        return q, values
    

    def add_object(self, (object_type, object_name), parent = None, **attrs):
        """
        Adds an object to the database.   When adding, an object is identified
        by a (type, name) tuple.  Parent is a (type, id) tuple which refers to
        the object's parent.  In both cases, "type" is a type name as 
        given to register_object_type_attrs().  attrs kwargs will vary based on
        object type.  ATTR_SIMPLE attributes which a None are not added.
        """
        if parent:
            attrs["parent_type"] = self._object_types[parent[0]][0]
            attrs["parent_id"] = parent[1]
        attrs["name"] = object_name
        query, values = self._make_query_from_attrs("add", attrs, object_type)
        self._db_query(query, values)
        attrs["id"] = self._cursor.lastrowid
        # add lastrowid
        return attrs


    def update_object(self, (object_type, object_id), parent = None, **attrs):
        """
        Update an object in the database.  For updating, object is identified
        by a (type, id) tuple.  Parent is a (type, id) tuple which refers to
        the object's parent.  If specified, the object is reparented,
        otherwise the parent remains the same as when it was added with
        add_object().  attrs kwargs will vary based on object type.  If a
        ATTR_SIMPLE attribute is set to None, it will be removed from the
        pickled dictionary.
        """
        row = self._db_query_row("SELECT pickle FROM objects_%s WHERE id=?" % 
object_type,
                                 (object_id,))
        assert(row)
        if row[0]:
            row_attrs = cPickle.loads(row[0].encode("utf-8"))
            row_attrs.update(attrs)
            attrs = row_attrs
        if parent:
            attrs["parent_type"] = self._object_types[parent[0]][0]
            attrs["parent_id"] = parent[1]
        attrs["id"] = object_id
        query, values = self._make_query_from_attrs("update", attrs, 
object_type)
        self._db_query(query, values)


    def query(self, **attrs):
        """
        Query the database for objects matching all of the given attributes
        (specified in kwargs).  "parent" is a special kwarg which is a
        (type, id) tuple referring to the object's parent.  "object" is 
        another special kwarg which is a (type, id) tuple referring to a
        specific object.  "type" can refer to a object type name, and if
        specified, only objects of that type will be queried.
        """
        if "type" in attrs:
            type_list = [(attrs["type"], self._object_types[attrs["type"]])]
            del attrs["type"]
        else:
            type_list = self._object_types.items()

        if "parent" in attrs:
            parent_type, parent_id = attrs["parent"]
            attrs["parent_type"] = self._object_types[parent_type][0]
            attrs["parent_id"] = parent_id
            del attrs["parent"]

        results = []
        for type_name, (type_id, type_attrs) in type_list:
            # List of attribute dicts for this type.
            columns = filter(lambda x: type_attrs[x][1], type_attrs.keys())

            # Construct a query based on the supplied attributes for this
            # object type.  If any of the attribute names aren't valid for
            # this type, then we don't bother matching, since this an AND
            # query and there aren't be any matches.
            q = "SELECT %s FROM objects_%s WHERE " % \
                (string.join(columns, ","), type_name)
            query_values = []
            for attr, value in attrs.items():
                q += "%s=? AND " % attr
                query_values.append(value)

            # Trim off last AND
            q = q[:-4]
            rows = self._db_query(q, query_values)
            #results.append((columns_dict, type_name, row))
            #results.extend(rows)
            results.append((columns, type_name, rows))

        return results

    def query_normalized(self, **attrs):
        """
        Performs a query as in query() and returns normalized results.
        """
        return self.normalize_query_results(self.query(**attrs))

    def normalize_query_results(self, results):
        """
        Takes a results list as returned from query() and converts to a list
        of dicts.  Each result dict is given a "type" entry which corresponds 
        to the type name of that object.
        """
        new_results = []
        for columns, type_name, rows in results:
            for row in rows:
                result = dict(zip(columns, row))
                result["type"] = type_name
                if result["pickle"]:
                    pickle = cPickle.loads(result["pickle"].encode("utf-8"))
                    del result["pickle"]
                    result.update(pickle)
                new_results.append(result)
        return new_results


    def list_query_results_names(self, results):
        """
        Do a quick-and-dirty list of filenames given a query results list,
        sorted by filename.
        """
        # XXX: should be part of VFS, not database.
        files = []
        for columns, type_name, rows in results:
            filecol = columns.index("name")
            for row in rows:
                files.append(row[filecol])
        files.sort()
        return files


--- NEW FILE: .cvsignore ---
*.pyc *.pyo

--- NEW FILE: __init__.py ---
from db import Database, ATTR_SIMPLE, ATTR_SEARCHABLE, ATTR_INDEXED, 
ATTR_KEYWORDS



-------------------------------------------------------
SF.Net email is Sponsored by the Better Software Conference & EXPO
September 19-22, 2005 * San Francisco, CA * Development Lifecycle Practices
Agile & Plan-Driven Development * Managing Projects & Teams * Testing & QA
Security * Process Improvement & Measurement * http://www.sqe.com/bsce5sf
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to