Hi JJ, 

I think this is a great idea.   

Do you want to make the pull request for this?  Or I can merge it into my fork 
if you like.

Cheers
Ira



On 26 Jul 2014, at 3:22 am, Jim Johnson <johns...@umn.edu> wrote:

> Ira,
> 
> Thanks for getting a sqlite datatype into galaxy.  I was wanting to subclass 
> a sqlite datatype for another application: cistrome CEAS
> 
> I had made a sqlite datatype that captured a little metadata that I thought 
> could be useful for the display peek, and perhaps for filtering.  
> Is this worth considering?  
> 
> 
> The peek for a CEAS DB would include table names, column names, and row count:
> SQLite Database
> GeneTable 
> (chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2)
>  [24892]
> GenomeBGP 
> (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms) [7]
> GenomeBGS 
> (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns) [7]
> GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms) [7]
> GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7]
> 
> 
> 
> $ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py
> diff -r f002131cb905 lib/galaxy/datatypes/binary.py
> --- a/lib/galaxy/datatypes/binary.py    Fri Jul 25 12:01:34 2014 -0400
> +++ b/lib/galaxy/datatypes/binary.py    Fri Jul 25 12:13:05 2014 -0500
> @@ -20,7 +20,7 @@
>  
>  from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, 
> TWOBIT_MAGIC_SIZE
>  
> -from galaxy.datatypes.metadata import MetadataElement
> +from galaxy.datatypes.metadata import 
> MetadataElement,ListParameter,DictParameter
>  from galaxy.datatypes import metadata
>  from galaxy.datatypes.sniff import *
>  import dataproviders
> @@ -550,8 +550,36 @@
>  
>  @dataproviders.decorators.has_dataproviders
>  class SQlite ( Binary ):
> +    """Class describing a Sqlite database """
> +    MetadataElement( name="tables", default=[], param=ListParameter, 
> desc="Database Tables", readonly=True, visible=True, no_value=[] )
> +    MetadataElement( name="table_columns", default={}, param=DictParameter, 
> desc="Database Table Columns", readonly=True, visible=True, no_value={} )
> +    MetadataElement( name="table_row_count", default={}, 
> param=DictParameter, desc="Database Table Row Count", readonly=True, 
> visible=True, no_value={} )
>      file_ext = "sqlite"
>  
> +    def init_meta( self, dataset, copy_from=None ):
> +        Binary.init_meta( self, dataset, copy_from=copy_from )
> +
> +    def set_meta( self, dataset, overwrite = True, **kwd ):
> +        try:
> +            tables = []
> +            columns = dict()
> +            rowcounts = dict()
> +            conn = sqlite3.connect(dataset.file_name)
> +            c = conn.cursor()
> +            tables_query = "SELECT name,sql FROM sqlite_master WHERE 
> type='table' ORDER BY name"
> +            rslt = c.execute(tables_query).fetchall();
> +            for table,sql in rslt:
> +                tables.append(table)
> +                columns[table] = re.sub('^.*\((.*)\)$','\\1',sql).split(',')
> +            for table in tables:
> +                row_query = "SELECT count(*) FROM %s" % table
> +                rowcounts[table] = c.execute(row_query).fetchone()[0];
> +            dataset.metadata.tables = tables
> +            dataset.metadata.table_columns = columns
> +            dataset.metadata.table_row_count = rowcounts
> +        except Exception, exc:
> +            pass
> +
>      # Connects and runs a query that should work on any real database 
>      # If the file is not sqlite, an exception will be thrown and the sniffer 
> will return false
>      def sniff( self, filename ):
> @@ -567,7 +595,14 @@
>  
>      def set_peek( self, dataset, is_multi_byte=False ):
>          if not dataset.dataset.purged:
> -            dataset.peek  = "SQLite Database"
> +            lines = ['SQLite Database']
> +            if dataset.metadata.tables:
> +                for table in dataset.metadata.tables:
> +                    try:
> +                        lines.append('%s (%s) [%s]' % 
> (table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table]))
> +                    except:
> +                        continue
> +            dataset.peek = '\n'.join(lines)
>              dataset.blurb = data.nice_size( dataset.get_size() )
>          else:
>              dataset.peek = 'file does not exist'
> 
> 
> Thanks,
> 
> JJ
> -- 
> James E. Johnson, Minnesota Supercomputing Institute, University of Minnesota

___________________________________________________________
Please keep all replies on the list by using "reply all"
in your mail client.  To manage your subscriptions to this
and other Galaxy lists, please use the interface at:
  http://lists.bx.psu.edu/

To search Galaxy mailing lists use the unified search at:
  http://galaxyproject.org/search/mailinglists/

Reply via email to