[gentoo-commits] proj/sci:master commit in: sci-biology/biopython/, sci-biology/biopython/files/

Martin Mokrejs Fri, 18 Apr 2014 11:15:12 -0700

commit:     20c8e6ec36a2cdd5af944030720b059fb8d10891
Author:     Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Fri Apr 18 18:11:59 2014 +0000
Commit:     Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Fri Apr 18 18:11:59 2014 +0000
URL:        
http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=20c8e6ec


sci-biology/biopython: upstream patch to downgrade an assert to a warning

Package-Manager: portage-2.2.7

---
 sci-biology/biopython/ChangeLog                    |  11 ++
 ...hon-1.63-r1.ebuild => biopython-1.62-r3.ebuild} |   5 +-
 ...hon-1.63-r1.ebuild => biopython-1.62-r4.ebuild} |   5 +-
 sci-biology/biopython/biopython-1.63-r1.ebuild     |   3 +-
 sci-biology/biopython/files/SeqRecord.py.patch     | 148 +++++++++++++++++++++
 .../biopython/files/SffIO_broken_padding.patch     |  27 ++++
 .../biopython/files/adjust-trimpoints.patch        |  76 +++++++++++
 7 files changed, 270 insertions(+), 5 deletions(-)

diff --git a/sci-biology/biopython/ChangeLog b/sci-biology/biopython/ChangeLog
index 037227d..c326c4a 100644
--- a/sci-biology/biopython/ChangeLog
+++ b/sci-biology/biopython/ChangeLog
@@ -2,6 +2,17 @@
 # Copyright 1999-2014 Gentoo Foundation; Distributed under the GPL v2
 # $Header: $
 
+*biopython-1.62-r3 (18 Apr 2014)
+*biopython-1.62-r4 (18 Apr 2014)
+
+  18 Apr 2014; Martin Mokrejs <[email protected]>
+  +biopython-1.62-r3.ebuild, +biopython-1.62-r4.ebuild,
+  +files/SeqRecord.py.patch, +files/SffIO_broken_padding.patch,
+  +files/adjust-trimpoints.patch, biopython-1.63-r1.ebuild,
+  files/SffIO_error_in_check_eof.patch, files/biopython-1.51-flex.patch,
+  files/biopython-1.62-SffIO.patch:
+  sci-biology/biopython: upstream patch to downgrade an assert to a warning
+
   23 Mar 2014; Martin Mokrejs <[email protected]>
   -biopython-1.62-r3.ebuild, -biopython-1.62-r4.ebuild, -biopython-1.63.ebuild,
   -files/SeqRecord.py.patch, -files/adjust-trimpoints.patch,

diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild 
b/sci-biology/biopython/biopython-1.62-r3.ebuild
similarity index 90%
copy from sci-biology/biopython/biopython-1.63-r1.ebuild
copy to sci-biology/biopython/biopython-1.62-r3.ebuild
index e99f846..09e6ed7 100644
--- a/sci-biology/biopython/biopython-1.63-r1.ebuild
+++ b/sci-biology/biopython/biopython-1.62-r3.ebuild
@@ -23,7 +23,6 @@ RDEPEND="${PYTHON_DEPS}
        dev-python/matplotlib[${PYTHON_USEDEP}]
        dev-python/networkx[${PYTHON_USEDEP}]
        dev-python/numpy[${PYTHON_USEDEP}]
-       dev-python/rdflib[${PYTHON_USEDEP}]
        dev-python/pygraphviz[${PYTHON_USEDEP}]
        dev-python/reportlab[${PYTHON_USEDEP}]
        media-gfx/pydot[${PYTHON_USEDEP}]
@@ -35,8 +34,10 @@ DEPEND="${RDEPEND}
 DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
 
 src_prepare() {
-       epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch
        distutils-r1_src_prepare
+       epatch "${FILESDIR}/${PN}-1.62-SffIO.patch"
+       epatch "${FILESDIR}/SffIO_error_in_check_eof.patch"
+       epatch "${FILESDIR}/SffIO_broken_padding.patch"
 }
 
 python_test() {

diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild 
b/sci-biology/biopython/biopython-1.62-r4.ebuild
similarity index 90%
copy from sci-biology/biopython/biopython-1.63-r1.ebuild
copy to sci-biology/biopython/biopython-1.62-r4.ebuild
index e99f846..09e6ed7 100644
--- a/sci-biology/biopython/biopython-1.63-r1.ebuild
+++ b/sci-biology/biopython/biopython-1.62-r4.ebuild
@@ -23,7 +23,6 @@ RDEPEND="${PYTHON_DEPS}
        dev-python/matplotlib[${PYTHON_USEDEP}]
        dev-python/networkx[${PYTHON_USEDEP}]
        dev-python/numpy[${PYTHON_USEDEP}]
-       dev-python/rdflib[${PYTHON_USEDEP}]
        dev-python/pygraphviz[${PYTHON_USEDEP}]
        dev-python/reportlab[${PYTHON_USEDEP}]
        media-gfx/pydot[${PYTHON_USEDEP}]
@@ -35,8 +34,10 @@ DEPEND="${RDEPEND}
 DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
 
 src_prepare() {
-       epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch
        distutils-r1_src_prepare
+       epatch "${FILESDIR}/${PN}-1.62-SffIO.patch"
+       epatch "${FILESDIR}/SffIO_error_in_check_eof.patch"
+       epatch "${FILESDIR}/SffIO_broken_padding.patch"
 }
 
 python_test() {

diff --git a/sci-biology/biopython/biopython-1.63-r1.ebuild 
b/sci-biology/biopython/biopython-1.63-r1.ebuild
index e99f846..b600748 100644
--- a/sci-biology/biopython/biopython-1.63-r1.ebuild
+++ b/sci-biology/biopython/biopython-1.63-r1.ebuild
@@ -35,7 +35,8 @@ DEPEND="${RDEPEND}
 DOCS=( CONTRIB DEPRECATED NEWS README Doc/. )
 
 src_prepare() {
-       epatch "${FILESDIR}"/SffIO_error_in_check_eof.patch
+       epatch "${FILESDIR}/SffIO_error_in_check_eof.patch"
+       epatch "${FILESDIR}/SffIO_broken_padding.patch"
        distutils-r1_src_prepare
 }
 

diff --git a/sci-biology/biopython/files/SeqRecord.py.patch 
b/sci-biology/biopython/files/SeqRecord.py.patch
new file mode 100644
index 0000000..ac3785f
--- /dev/null
+++ b/sci-biology/biopython/files/SeqRecord.py.patch
@@ -0,0 +1,148 @@
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
+index 1971dba..43b38fd 100644
+--- a/Bio/SeqIO/SffIO.py
++++ b/Bio/SeqIO/SffIO.py
+@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
+ 
+ 
+ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
+-                         key_sequence, alphabet, trim=False):
+-    """Parse the next read in the file, return data as a SeqRecord 
(PRIVATE)."""
++                         key_sequence, alphabet, trim=False, 
interpret_qual_trims=True, interpret_adapter_trims=False):
++    """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
++    Allow user to specify which type of clipping values should be applied
++    while reading the SFF stream. To be backwards compatible, we interpret
++    only the quality-based trim points by default. That results in lower-cased
++    sequences in the low-qual region, regardless what adapter-based clip 
points
++    say. This should be the desired behavior. More discussion at
++    https://redmine.open-bio.org/issues/3437
++    """
+     #Now on to the reads...
+     #the read header format (fixed part):
+     #read_header_length     H
+@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, 
number_of_flows_per_read, flow_chars,
+             warnings.warn("Post quality %i byte padding region contained 
data, SFF data is not broken"
+                              % padding)
+     #Follow Roche and apply most aggressive of qual and adapter clipping.
+-    #Note Roche seems to ignore adapter clip fields when writing SFF,
+-    #and uses just the quality clipping values for any clipping.
+-    clip_left = max(clip_qual_left, clip_adapter_left)
+-    #Right clipping of zero means no clipping
+-    if clip_qual_right:
+-        if clip_adapter_right:
+-            clip_right = min(clip_qual_right, clip_adapter_right)
++    #Note Roche does not use adapter clip fields when writing SFF files
++    #but instead combines the adapter clipping information with quality-based
++    #values and writes the most aggressive combination into clip fields (as
++    #allowed by SFF specs).
++
++    if interpret_qual_trims:
++        if interpret_adapter_trims:
++            clip_left = max(clip_qual_left, clip_adapter_left)
++            #Right clipping of zero means no clipping
++            if clip_qual_right:
++                if clip_adapter_right:
++                    clip_right = min(clip_qual_right, clip_adapter_right)
++                else:
++                    #Typical case with Roche SFF files
++                    clip_right = clip_qual_right
++            elif clip_adapter_right:
++                clip_right = clip_adapter_right
++            else:
++                clip_right = seq_len
+         else:
+-            #Typical case with Roche SFF files
+-            clip_right = clip_qual_right
+-    elif clip_adapter_right:
+-        clip_right = clip_adapter_right
++          clip_left = clip_qual_left
++          if clip_qual_right:
++              clip_right = clip_qual_right
++            else:
++              clip_right = seq_len
++    elif interpret_adapter_trims:
++        clip_left = clip_adapter_left
++      if clip_adapter_right:
++          clip_right = clip_adapter_right
++      else:
++          clip_right = seq_len
+     else:
+-        clip_right = seq_len
++        clip_left = 0
++      clip_right = seq_len
++
+     #Now build a SeqRecord
+     if trim:
+         seq = seq[clip_left:clip_right].upper()
+diff --git a/Bio/SeqRecord.py b/Bio/SeqRecord.py
+index c90e13b..66bdea0 100644
+--- a/Bio/SeqRecord.py
++++ b/Bio/SeqRecord.py
+@@ -14,6 +14,8 @@ __docformat__ = "epytext en"  # Simple markup to show 
doctests nicely
+ # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class)
+ 
+ 
++from Bio.Seq import Seq
++
+ class _RestrictedDict(dict):
+     """Dict which only allows sequences of given length as values (PRIVATE).
+ 
+@@ -76,7 +78,7 @@ class _RestrictedDict(dict):
+         if not hasattr(value, "__len__") or not hasattr(value, "__getitem__") 
\
+         or (hasattr(self, "_length") and len(value) != self._length):
+             raise TypeError("We only allow python sequences (lists, tuples or 
"
+-                            "strings) of length %i." % self._length)
++                            "strings) of length %i whereas you passed an 
object of length %s." % (self._length, str(len(value))))
+         dict.__setitem__(self, key, value)
+ 
+     def update(self, new_dict):
+@@ -290,10 +292,11 @@ class SeqRecord(object):
+         """)
+ 
+     def _set_seq(self, value):
+-        #TODO - Add a deprecation warning that the seq should be write only?
+-        if self._per_letter_annotations:
+-            #TODO - Make this a warning? Silently empty the dictionary?
+-            raise ValueError("You must empty the letter annotations first!")
++        # we should be much more user friendly and accept even a plain 
sequence string
++      # and make the Seq or MutableSeq object ourselves
++        if not isinstance(value, Seq):
++            raise ValueError("You must pass a Seq object containing the new 
sequence instead of just plain string.")
++        else:
+         self._seq = value
+         try:
+             self._per_letter_annotations = 
_RestrictedDict(length=len(self.seq))
+@@ -696,7 +699,7 @@ class SeqRecord(object):
+         SeqIO.write(self, handle, format_spec)
+         return handle.getvalue()
+ 
+-    def __len__(self):
++    def __len__(self, trim=False, interpret_qual_trims=True, 
interpret_adapter_trims=False):
+         """Returns the length of the sequence.
+ 
+         For example, using Bio.SeqIO to read in a FASTA nucleotide file:
+@@ -707,6 +710,10 @@ class SeqRecord(object):
+         309
+         >>> len(record.seq)
+         309
++
++      It should be possible to get length of a raw object, of trimmed
++      object by quality or adapter criteria or both, whenever user wants
++      to, not only when data is parsed from input.
+         """
+         return len(self.seq)
+ 
+@@ -725,6 +732,13 @@ class SeqRecord(object):
+         """
+         return True
+ 
++    def apply_trimpoints(self, trim=False, interpret_qual_trims=False, 
interpret_adapter_trims=False):
++        """We should apply either of the quality-based or adapter-based 
annotated
++      trim points and return a new, sliced object.
++      """
++      pass
++
++
+     def __add__(self, other):
+         """Add another sequence or string to this sequence.
+ 

diff --git a/sci-biology/biopython/files/SffIO_broken_padding.patch 
b/sci-biology/biopython/files/SffIO_broken_padding.patch
new file mode 100644
index 0000000..a009c58
--- /dev/null
+++ b/sci-biology/biopython/files/SffIO_broken_padding.patch
@@ -0,0 +1,27 @@
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
+index 735d55b..b89cf41 100644
+--- a/Bio/SeqIO/SffIO.py
++++ b/Bio/SeqIO/SffIO.py
+@@ -933,12 +933,20 @@ def _check_eof(handle, index_offset, index_length):
+                          "null padding region ended '.sff' which could "
+                          "be the start of a concatenated SFF file? "
+                          "See offset %i" % (padding, offset))
++    if padding and not extra:
++        #TODO - Is this error harmless enough to just ignore?
++        import warnings
++        from Bio import BiopythonParserWarning
++        warnings.warn("Your SFF file is technically invalid as it is missing "
++                      "a terminal %i byte null padding region." % padding,
++                      BiopythonParserWarning)
++        return
+     if extra.count(_null) != padding:
+         import warnings
+         from Bio import BiopythonParserWarning
+         warnings.warn("Your SFF file is invalid, post index %i byte "
+-                      "null padding region contained data." % padding,
+-                      BiopythonParserWarning)
++                      "null padding region contained data: %r"
++                      % (padding, extra), BiopythonParserWarning)
+ 
+     offset = handle.tell()
+     assert offset % 8 == 0, \

diff --git a/sci-biology/biopython/files/adjust-trimpoints.patch 
b/sci-biology/biopython/files/adjust-trimpoints.patch
new file mode 100644
index 0000000..dd6d548
--- /dev/null
+++ b/sci-biology/biopython/files/adjust-trimpoints.patch
@@ -0,0 +1,76 @@
+diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
+index 1971dba..43b38fd 100644
+--- a/Bio/SeqIO/SffIO.py
++++ b/Bio/SeqIO/SffIO.py
+@@ -539,8 +539,15 @@ _valid_UAN_read_name = re.compile(r'^[a-zA-Z0-9]{14}$')
+ 
+ 
+ def _sff_read_seq_record(handle, number_of_flows_per_read, flow_chars,
+-                         key_sequence, alphabet, trim=False):
+-    """Parse the next read in the file, return data as a SeqRecord 
(PRIVATE)."""
++                         key_sequence, alphabet, trim=False, 
interpret_qual_trims=True, interpret_adapter_trims=False):
++    """Parse the next read in the file, return data as a SeqRecord (PRIVATE).
++    Allow user to specify which type of clipping values should be applied
++    while reading the SFF stream. To be backwards compatible, we interpret
++    only the quality-based trim points by default. That results in lower-cased
++    sequences in the low-qual region, regardless what adapter-based clip 
points
++    say. This should be the desired behavior. More discussion at
++    https://redmine.open-bio.org/issues/3437
++    """
+     #Now on to the reads...
+     #the read header format (fixed part):
+     #read_header_length     H
+@@ -589,20 +596,41 @@ def _sff_read_seq_record(handle, 
number_of_flows_per_read, flow_chars,
+             warnings.warn("Post quality %i byte padding region contained 
data, SFF data is not broken"
+                              % padding)
+     #Follow Roche and apply most aggressive of qual and adapter clipping.
+-    #Note Roche seems to ignore adapter clip fields when writing SFF,
+-    #and uses just the quality clipping values for any clipping.
+-    clip_left = max(clip_qual_left, clip_adapter_left)
+-    #Right clipping of zero means no clipping
+-    if clip_qual_right:
+-        if clip_adapter_right:
+-            clip_right = min(clip_qual_right, clip_adapter_right)
++    #Note Roche does not use adapter clip fields when writing SFF files
++    #but instead combines the adapter clipping information with quality-based
++    #values and writes the most aggressive combination into clip fields (as
++    #allowed by SFF specs).
++
++    if interpret_qual_trims:
++        if interpret_adapter_trims:
++            clip_left = max(clip_qual_left, clip_adapter_left)
++            #Right clipping of zero means no clipping
++            if clip_qual_right:
++                if clip_adapter_right:
++                    clip_right = min(clip_qual_right, clip_adapter_right)
++                else:
++                    #Typical case with Roche SFF files
++                    clip_right = clip_qual_right
++            elif clip_adapter_right:
++                clip_right = clip_adapter_right
++            else:
++                clip_right = seq_len
+         else:
+-            #Typical case with Roche SFF files
+-            clip_right = clip_qual_right
+-    elif clip_adapter_right:
+-        clip_right = clip_adapter_right
++          clip_left = clip_qual_left
++          if clip_qual_right:
++              clip_right = clip_qual_right
++            else:
++              clip_right = seq_len
++    elif interpret_adapter_trims:
++        clip_left = clip_adapter_left
++      if clip_adapter_right:
++          clip_right = clip_adapter_right
++      else:
++          clip_right = seq_len
+     else:
+-        clip_right = seq_len
++        clip_left = 0
++      clip_right = seq_len
++
+     #Now build a SeqRecord
+     if trim:
+         seq = seq[clip_left:clip_right].upper()

[gentoo-commits] proj/sci:master commit in: sci-biology/biopython/, sci-biology/biopython/files/

Reply via email to