[gentoo-commits] proj/sci:master commit in: sci-biology/biopython/, sci-biology/biopython/files/

Martin Mokrejs Fri, 18 Apr 2014 11:23:41 -0700

commit:     74035c984818c6829c1f82cbcc1f419ff45f93d0
Author:     Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Fri Apr 18 18:21:20 2014 +0000
Commit:     Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Fri Apr 18 18:21:20 2014 +0000
URL:        
http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=74035c98


sci-biology/biopython: ops, taking back my own testing patches

Package-Manager: portage-2.2.7

---
 sci-biology/biopython/ChangeLog                    |   4 +
 sci-biology/biopython/files/SeqRecord.py.patch     | 148 ---------------------
 .../biopython/files/adjust-trimpoints.patch        |  76 -----------
 3 files changed, 4 insertions(+), 224 deletions(-)

diff --git a/sci-biology/biopython/ChangeLog b/sci-biology/biopython/ChangeLog
index c326c4a..796f6a9 100644
--- a/sci-biology/biopython/ChangeLog
+++ b/sci-biology/biopython/ChangeLog
@@ -2,6 +2,10 @@
 # Copyright 1999-2014 Gentoo Foundation; Distributed under the GPL v2
 # $Header: $
 
+  18 Apr 2014; Martin Mokrejs <[email protected]>
+  -files/SeqRecord.py.patch, -files/adjust-trimpoints.patch:
+  sci-biology/biopython: ops, taking back my own testing patches
+
 *biopython-1.62-r3 (18 Apr 2014)
 *biopython-1.62-r4 (18 Apr 2014)
 

diff --git a/sci-biology/biopython/files/SeqRecord.py.patch 
b/sci-biology/biopython/files/SeqRecord.py.patch
deleted file mode 100644
index ac3785f..0000000
--- a/sci-biology/biopython/files/SeqRecord.py.patch
+++ /dev/null
@@ -1,148 +0,0 @@
-diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
-index 1971dba..43b38fd 100644
---- a/Bio/SeqIO/SffIO.py
-+++ b/Bio/SeqIO/SffIO.py
-@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
- 
- 
- def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
--                         key_sequence, alphabet, trim=False):
--    """Parse the next read in the file, return data as a SeqRecord 
(PRIVATE)."""
-+                         key_sequence, alphabet, trim=False, 
interpret_qual_trims=True, interpret_adapter_trims=False):
-+    """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
-+    Allow user to specify which type of clipping values should be applied
-+    while reading the SFF stream. To be backwards compatible, we interpret
-+    only the quality-based trim points by default. That results in lower-cased
-+    sequences in the low-qual region, regardless what adapter-based clip 
points
-+    say. This should be the desired behavior. More discussion at
-+    https://redmine.open-bio.org/issues/3437
-+    """
-     #Now on to the reads...
-     #the read header format (fixed part):
-     #read_header_length     H
-@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, 
number_of_flows_per_read, flow_chars,
-             warnings.warn("Post quality %i byte padding region contained 
data, SFF data is not broken"
-                              % padding)
-     #Follow Roche and apply most aggressive of qual and adapter clipping.
--    #Note Roche seems to ignore adapter clip fields when writing SFF,
--    #and uses just the quality clipping values for any clipping.
--    clip_left = max(clip_qual_left, clip_adapter_left)
--    #Right clipping of zero means no clipping
--    if clip_qual_right:
--        if clip_adapter_right:
--            clip_right = min(clip_qual_right, clip_adapter_right)
-+    #Note Roche does not use adapter clip fields when writing SFF files
-+    #but instead combines the adapter clipping information with quality-based
-+    #values and writes the most aggressive combination into clip fields (as
-+    #allowed by SFF specs).
-+
-+    if interpret_qual_trims:
-+        if interpret_adapter_trims:
-+            clip_left = max(clip_qual_left, clip_adapter_left)
-+            #Right clipping of zero means no clipping
-+            if clip_qual_right:
-+                if clip_adapter_right:
-+                    clip_right = min(clip_qual_right, clip_adapter_right)
-+                else:
-+                    #Typical case with Roche SFF files
-+                    clip_right = clip_qual_right
-+            elif clip_adapter_right:
-+                clip_right = clip_adapter_right
-+            else:
-+                clip_right = seq_len
-         else:
--            #Typical case with Roche SFF files
--            clip_right = clip_qual_right
--    elif clip_adapter_right:
--        clip_right = clip_adapter_right
-+          clip_left = clip_qual_left
-+          if clip_qual_right:
-+              clip_right = clip_qual_right
-+            else:
-+              clip_right = seq_len
-+    elif interpret_adapter_trims:
-+        clip_left = clip_adapter_left
-+      if clip_adapter_right:
-+          clip_right = clip_adapter_right
-+      else:
-+          clip_right = seq_len
-     else:
--        clip_right = seq_len
-+        clip_left = 0
-+      clip_right = seq_len
-+
-     #Now build a SeqRecord
-     if trim:
-         seq = seq[clip_left:clip_right].upper()
-diff --git a/Bio/SeqRecord.py b/Bio/SeqRecord.py
-index c90e13b..66bdea0 100644
---- a/Bio/SeqRecord.py
-+++ b/Bio/SeqRecord.py
-@@ -14,6 +14,8 @@ __docformat__ = "epytext en"  # Simple markup to show 
doctests nicely
- # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class)
- 
- 
-+from Bio.Seq import Seq
-+
- class _RestrictedDict(dict):
-     """Dict which only allows sequences of given length as values (PRIVATE).
- 
-@@ -76,7 +78,7 @@ class _RestrictedDict(dict):
-         if not hasattr(value, "__len__") or not hasattr(value, "__getitem__") 
\
-         or (hasattr(self, "_length") and len(value) != self._length):
-             raise TypeError("We only allow python sequences (lists, tuples or 
"
--                            "strings) of length %i." % self._length)
-+                            "strings) of length %i whereas you passed an 
object of length %s." % (self._length, str(len(value))))
-         dict.__setitem__(self, key, value)
- 
-     def update(self, new_dict):
-@@ -290,10 +292,11 @@ class SeqRecord(object):
-         """)
- 
-     def _set_seq(self, value):
--        #TODO - Add a deprecation warning that the seq should be write only?
--        if self._per_letter_annotations:
--            #TODO - Make this a warning? Silently empty the dictionary?
--            raise ValueError("You must empty the letter annotations first!")
-+        # we should be much more user friendly and accept even a plain 
sequence string
-+      # and make the Seq or MutableSeq object ourselves
-+        if not isinstance(value, Seq):
-+            raise ValueError("You must pass a Seq object containing the new 
sequence instead of just plain string.")
-+        else:
-         self._seq = value
-         try:
-             self._per_letter_annotations = 
_RestrictedDict(length=len(self.seq))
-@@ -696,7 +699,7 @@ class SeqRecord(object):
-         SeqIO.write(self, handle, format_spec)
-         return handle.getvalue()
- 
--    def __len__(self):
-+    def __len__(self, trim=False, interpret_qual_trims=True, 
interpret_adapter_trims=False):
-         """Returns the length of the sequence.
- 
-         For example, using Bio.SeqIO to read in a FASTA nucleotide file:
-@@ -707,6 +710,10 @@ class SeqRecord(object):
-         309
-         >>> len(record.seq)
-         309
-+
-+      It should be possible to get length of a raw object, of trimmed
-+      object by quality or adapter criteria or both, whenever user wants
-+      to, not only when data is parsed from input.
-         """
-         return len(self.seq)
- 
-@@ -725,6 +732,13 @@ class SeqRecord(object):
-         """
-         return True
- 
-+    def apply_trimpoints(self, trim=False, interpret_qual_trims=False, 
interpret_adapter_trims=False):
-+        """We should apply either of the quality-based or adapter-based 
annotated
-+      trim points and return a new, sliced object.
-+      """
-+      pass
-+
-+
-     def __add__(self, other):
-         """Add another sequence or string to this sequence.
- 

diff --git a/sci-biology/biopython/files/adjust-trimpoints.patch 
b/sci-biology/biopython/files/adjust-trimpoints.patch
deleted file mode 100644
index dd6d548..0000000
--- a/sci-biology/biopython/files/adjust-trimpoints.patch
+++ /dev/null
@@ -1,76 +0,0 @@
-diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
-index 1971dba..43b38fd 100644
---- a/Bio/SeqIO/SffIO.py
-+++ b/Bio/SeqIO/SffIO.py
-@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
- 
- 
- def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
--                         key_sequence, alphabet, trim=False):
--    """Parse the next read in the file, return data as a SeqRecord 
(PRIVATE)."""
-+                         key_sequence, alphabet, trim=False, 
interpret_qual_trims=True, interpret_adapter_trims=False):
-+    """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
-+    Allow user to specify which type of clipping values should be applied
-+    while reading the SFF stream. To be backwards compatible, we interpret
-+    only the quality-based trim points by default. That results in lower-cased
-+    sequences in the low-qual region, regardless what adapter-based clip 
points
-+    say. This should be the desired behavior. More discussion at
-+    https://redmine.open-bio.org/issues/3437
-+    """
-     #Now on to the reads...
-     #the read header format (fixed part):
-     #read_header_length     H
-@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, 
number_of_flows_per_read, flow_chars,
-             warnings.warn("Post quality %i byte padding region contained 
data, SFF data is not broken"
-                              % padding)
-     #Follow Roche and apply most aggressive of qual and adapter clipping.
--    #Note Roche seems to ignore adapter clip fields when writing SFF,
--    #and uses just the quality clipping values for any clipping.
--    clip_left = max(clip_qual_left, clip_adapter_left)
--    #Right clipping of zero means no clipping
--    if clip_qual_right:
--        if clip_adapter_right:
--            clip_right = min(clip_qual_right, clip_adapter_right)
-+    #Note Roche does not use adapter clip fields when writing SFF files
-+    #but instead combines the adapter clipping information with quality-based
-+    #values and writes the most aggressive combination into clip fields (as
-+    #allowed by SFF specs).
-+
-+    if interpret_qual_trims:
-+        if interpret_adapter_trims:
-+            clip_left = max(clip_qual_left, clip_adapter_left)
-+            #Right clipping of zero means no clipping
-+            if clip_qual_right:
-+                if clip_adapter_right:
-+                    clip_right = min(clip_qual_right, clip_adapter_right)
-+                else:
-+                    #Typical case with Roche SFF files
-+                    clip_right = clip_qual_right
-+            elif clip_adapter_right:
-+                clip_right = clip_adapter_right
-+            else:
-+                clip_right = seq_len
-         else:
--            #Typical case with Roche SFF files
--            clip_right = clip_qual_right
--    elif clip_adapter_right:
--        clip_right = clip_adapter_right
-+          clip_left = clip_qual_left
-+          if clip_qual_right:
-+              clip_right = clip_qual_right
-+            else:
-+              clip_right = seq_len
-+    elif interpret_adapter_trims:
-+        clip_left = clip_adapter_left
-+      if clip_adapter_right:
-+          clip_right = clip_adapter_right
-+      else:
-+          clip_right = seq_len
-     else:
--        clip_right = seq_len
-+        clip_left = 0
-+      clip_right = seq_len
-+
-     #Now build a SeqRecord
-     if trim:
-         seq = seq[clip_left:clip_right].upper()

[gentoo-commits] proj/sci:master commit in: sci-biology/biopython/, sci-biology/biopython/files/

Reply via email to