Ira,
Thanks for getting a sqlite datatype into galaxy. I was wanting to subclass a
sqlite datatype for another application: cistrome CEAS
I had made a sqlite datatype that captured a little metadata that I thought
could be useful for the display peek, and perhaps for filtering.
Is this worth considering?
The peek for a CEAS DB would include table names, column names, and row count:
SQLite Database
GeneTable
(chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2)
[24892]
GenomeBGP
(chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms) [7]
GenomeBGS
(chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns) [7]
GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms) [7]
GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7]
$ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py
diff -r f002131cb905 lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.py Fri Jul 25 12:01:34 2014 -0400
+++ b/lib/galaxy/datatypes/binary.py Fri Jul 25 12:13:05 2014 -0500
@@ -20,7 +20,7 @@
from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP,
TWOBIT_MAGIC_SIZE
-from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.metadata import
MetadataElement,ListParameter,DictParameter
from galaxy.datatypes import metadata
from galaxy.datatypes.sniff import *
import dataproviders
@@ -550,8 +550,36 @@
@dataproviders.decorators.has_dataproviders
class SQlite ( Binary ):
+ """Class describing a Sqlite database """
+ MetadataElement( name="tables", default=[], param=ListParameter, desc="Database
Tables", readonly=True, visible=True, no_value=[] )
+ MetadataElement( name="table_columns", default={}, param=DictParameter,
desc="Database Table Columns", readonly=True, visible=True, no_value={} )
+ MetadataElement( name="table_row_count", default={}, param=DictParameter,
desc="Database Table Row Count", readonly=True, visible=True, no_value={} )
file_ext = "sqlite"
+ def init_meta( self, dataset, copy_from=None ):
+ Binary.init_meta( self, dataset, copy_from=copy_from )
+
+ def set_meta( self, dataset, overwrite = True, **kwd ):
+ try:
+ tables = []
+ columns = dict()
+ rowcounts = dict()
+ conn = sqlite3.connect(dataset.file_name)
+ c = conn.cursor()
+ tables_query = "SELECT name,sql FROM sqlite_master WHERE type='table'
ORDER BY name"
+ rslt = c.execute(tables_query).fetchall();
+ for table,sql in rslt:
+ tables.append(table)
+ columns[table] = re.sub('^.*\((.*)\)$','\\1',sql).split(',')
+ for table in tables:
+ row_query = "SELECT count(*) FROM %s" % table
+ rowcounts[table] = c.execute(row_query).fetchone()[0];
+ dataset.metadata.tables = tables
+ dataset.metadata.table_columns = columns
+ dataset.metadata.table_row_count = rowcounts
+ except Exception, exc:
+ pass
+
# Connects and runs a query that should work on any real database
# If the file is not sqlite, an exception will be thrown and the sniffer
will return false
def sniff( self, filename ):
@@ -567,7 +595,14 @@
def set_peek( self, dataset, is_multi_byte=False ):
if not dataset.dataset.purged:
- dataset.peek = "SQLite Database"
+ lines = ['SQLite Database']
+ if dataset.metadata.tables:
+ for table in dataset.metadata.tables:
+ try:
+ lines.append('%s (%s) [%s]' %
(table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table]))
+ except:
+ continue
+ dataset.peek = '\n'.join(lines)
dataset.blurb = data.nice_size( dataset.get_size() )
else:
dataset.peek = 'file does not exist'
Thanks,
JJ
--
James E. Johnson, Minnesota Supercomputing Institute, University of Minnesota
___________________________________________________________
Please keep all replies on the list by using "reply all"
in your mail client. To manage your subscriptions to this
and other Galaxy lists, please use the interface at:
http://lists.bx.psu.edu/
To search Galaxy mailing lists use the unified search at:
http://galaxyproject.org/search/mailinglists/