Re: [galaxy-dev] some problem with the fastq interlacer

2012-12-12 Thread David Roquis

Hi,

I recently had a problem with fastq interlacer lately, I had to patch it on my 
galaxy instance to make it work again. Here is the patch (from the developper).

Sincerely

David



Date: Tue, 11 Dec 2012 17:33:05 +0800
From: caizexi...@yahoo.com.cn
To: galaxy-dev@lists.bx.psu.edu
CC: galaxy-dev@lists.bx.psu.edu
Subject: [galaxy-dev] some problem with the fastq interlacer

Hi all, I ran the fastq interlacer with a fastq encoding 1.8+ data, the 
interlacer seems do not reganize the name of the pair-end reads, and treat the 
pair-end reads as non-pair-edn reads. Best regards
___
Please keep all replies on the list by using reply all
in your mail client.  To manage your subscriptions to this
and other Galaxy lists, please use the interface at:

  http://lists.bx.psu.edu/#Dan Blankenberg
import math
import string
import transform
from sequence import SequencingRead
from fasta import fastaSequence

class fastqSequencingRead( SequencingRead ):
format = 'sanger' #sanger is default
ascii_min = 33
ascii_max = 126
quality_min = 0
quality_max = 93
score_system = 'phred' #phred or solexa
sequence_space = 'base' #base or color
@classmethod
def get_class_by_format( cls, format ):
assert format in FASTQ_FORMATS, 'Unknown format type specified: %s' % format
return FASTQ_FORMATS[ format ]
@classmethod
def convert_score_phred_to_solexa( cls, decimal_score_list ):
def phred_to_solexa( score ):
if score = 0: #can't take log10( 1 - 1 ); make = 0 into -5
return -5
return int( round( 10.0 * math.log10( math.pow( 10.0, ( float( score ) / 10.0 ) ) - 1.0 ) ) )
return map( phred_to_solexa, decimal_score_list )
@classmethod
def convert_score_solexa_to_phred( cls, decimal_score_list ):
def solexa_to_phred( score ):
return int( round( 10.0 * math.log10( math.pow( 10.0, ( float( score ) / 10.0 ) ) + 1.0 ) ) )
return map( solexa_to_phred, decimal_score_list )
@classmethod
def restrict_scores_to_valid_range( cls, decimal_score_list ):
def restrict_score( score ):
return max( min( score, cls.quality_max ), cls.quality_min )
return map( restrict_score, decimal_score_list )
@classmethod
def convert_base_to_color_space( cls, sequence ):
return cls.color_space_converter.to_color_space( sequence )
@classmethod
def convert_color_to_base_space( cls, sequence ):
return cls.color_space_converter.to_base_space( sequence )
def is_ascii_encoded( self ):
#as per fastq definition only decimal quality strings can have spaces (and TABs for our purposes) in them (and must have a trailing space)
if ' ' in self.quality:
return False
if '\t' in self.quality:
return False
return True
def get_ascii_quality_scores( self ):
if self.is_ascii_encoded():
return list( self.quality )
else:
quality = self.quality.rstrip() #decimal scores should have a trailing space
if quality:
try:
return [ chr( int( val ) + self.ascii_min - self.quality_min ) for val in quality.split() ]
except ValueError, e:
raise ValueError( 'Error Parsing quality String. ASCII quality strings cannot contain spaces (%s): %s' % ( self.quality, e ) )
else:
return []
def get_decimal_quality_scores( self ):
if self.is_ascii_encoded():
return [ ord( val ) - self.ascii_min + self.quality_min for val in self.quality ]
else:
quality = self.quality.rstrip() #decimal scores should have a trailing space
if quality:
return [ int( val ) for val in quality.split() if val.strip() ]
else:
return []
def convert_read_to_format( self, format, force_quality_encoding = None ):
assert format in FASTQ_FORMATS, 'Unknown format type specified: %s' % format
assert force_quality_encoding in [ None, 'ascii', 'decimal' ], 'Invalid force_quality_encoding: %s' % force_quality_encoding
new_class = FASTQ_FORMATS[ format ]
new_read = new_class()
new_read.identifier = self.identifier
if self.sequence_space == new_class.sequence_space:
new_read.sequence = self.sequence
else:
if self.sequence_space == 'base':
new_read.sequence = self.convert_base_to_color_space( self.sequence )
else:
new_read.sequence = self.convert_color_to_base_space( self.sequence )
new_read.description = self.description
if self.score_system != new_read.score_system:
if self.score_system == 'phred':
score_list = self.convert_score_phred_to_solexa

[galaxy-dev] some problem with the fastq interlacer

2012-12-11 Thread 泽 蔡
Hi all,
 
I ran the fastq interlacer with a fastq encoding 1.8+ data, the interlacer 
seems do not reganize the name of the pair-end reads, and treat the pair-end 
reads as non-pair-edn reads.
 
Best regards___
Please keep all replies on the list by using reply all
in your mail client.  To manage your subscriptions to this
and other Galaxy lists, please use the interface at:

  http://lists.bx.psu.edu/