Hi everyone

I've been using the twobit datatype as generated by faToTwoBit (part of Jim Kent's BLAT package) and read by bx-python (bx.seq.twobit). So here's a patch to add the twobit datatype to Galaxy.


Peter

# HG changeset patch
# User Peter van Heusden <p...@sanbi.ac.za>
# Date 1307966741 -7200
# Node ID 3b68bc0d67b43af2ce69fb1eeb9160ca053c4c72
# Parent  8bcc0877b39bf10c2330f0651d2409a2b2e9c469
Added TwoBit datatype for twobit binary nucleotide datatype. Sniffer code
based on bx-python's bx.seq.twobit.

diff -r 8bcc0877b39b -r 3b68bc0d67b4 datatypes_conf.xml.sample
--- a/datatypes_conf.xml.sample	Fri Jun 10 20:10:09 2011 -0400
+++ b/datatypes_conf.xml.sample	Mon Jun 13 14:05:41 2011 +0200
@@ -116,6 +116,7 @@
         <datatype extension="svg" type="galaxy.datatypes.images:Image" mimetype="image/svg+xml"/>
         <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/>
         <datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/>
+	<datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/>
         <datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true"/>
         <datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/>
         <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" mimetype="application/xml" display_in_upload="true"/>
@@ -279,6 +280,7 @@
           defined format first, followed by next-most rigidly defined, 
           and so on.
         -->
+        <sniffer type="galaxy.datatypes.binary:TwoBit"/>
         <sniffer type="galaxy.datatypes.binary:Bam"/>
         <sniffer type="galaxy.datatypes.binary:Sff"/>
         <sniffer type="galaxy.datatypes.xml:BlastXml"/>
diff -r 8bcc0877b39b -r 3b68bc0d67b4 lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.py	Fri Jun 10 20:10:09 2011 -0400
+++ b/lib/galaxy/datatypes/binary.py	Mon Jun 13 14:05:41 2011 +0200
@@ -6,6 +6,10 @@
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
 from galaxy.datatypes.sniff import *
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "bx-python" )
+from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE
 from urllib import urlencode, quote_plus
 import zipfile, gzip
 import os, subprocess, tempfile
@@ -292,3 +296,29 @@
     def get_track_type( self ):
         return "LineTrack", {"data_standalone": "bigbed"}
 
+class TwoBit (Binary):
+    """Class describing a TwoBit format nucleotide file"""
+    
+    file_ext = "twobit"
+    
+    def sniff(self, filename):
+        try:
+            input = file(filename)
+            magic = struct.unpack(">L", input.read(TWOBIT_MAGIC_SIZE))[0]
+            if magic == TWOBIT_MAGIC_NUMBER or magic == TWOBIT_MAGIC_NUMBER_SWAP:
+                return True
+        except IOError:
+            return False
+        
+    def set_peek(self, dataset, is_multi_byte=False):
+        if not dataset.dataset.purged:
+            dataset.peek = "Binary TwoBit format nucleotide file"
+            dataset.blurb = data.nice_size(dataset.get_size())
+        else:
+            return super(TwoBit, self).set_peek(dataset, is_multi_byte)
+    
+    def display_peek(self, dataset):
+        try:
+            return dataset.peek
+        except:
+            return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size()))
___________________________________________________________
Please keep all replies on the list by using "reply all"
in your mail client.  To manage your subscriptions to this
and other Galaxy lists, please use the interface at:

  http://lists.bx.psu.edu/

Reply via email to