Andreas Tille pushed to branch master at Debian Med / nanolyse
Commits: bb65c8a5 by Andreas Tille at 2020-10-23T15:07:05+02:00 New upstream version 1.2.0 - - - - - 770239ef by Andreas Tille at 2020-10-23T15:07:05+02:00 routine-update: New upstream version - - - - - 8b2485ba by Andreas Tille at 2020-10-23T15:07:06+02:00 Update upstream source from tag 'upstream/1.2.0' Update to upstream version '1.2.0' with Debian dir 37bea00ea1e3ec30eddc47879ce3ef07d58b3723 - - - - - 60efe4b0 by Andreas Tille at 2020-10-23T15:08:25+02:00 routine-update: Ready to upload to unstable - - - - - 6 changed files: - .travis.yml - MANIFEST.in - debian/changelog - nanolyse/NanoLyse.py - nanolyse/version.py - setup.py Changes: ===================================== .travis.yml ===================================== @@ -1,9 +1,9 @@ language: python python: - - "3.5" - "3.6" - "3.7" + - "3.8" before_install: - cp README.md README.rst ===================================== MANIFEST.in ===================================== @@ -1 +1,2 @@ include reference/lambda.fasta.gz +include README.md ===================================== debian/changelog ===================================== @@ -1,3 +1,9 @@ +nanolyse (1.2.0-1) unstable; urgency=medium + + * New upstream version + + -- Andreas Tille <[email protected]> Fri, 23 Oct 2020 15:07:15 +0200 + nanolyse (1.1.1-1) unstable; urgency=medium * Initial release (Closes: #963620) ===================================== nanolyse/NanoLyse.py ===================================== @@ -43,7 +43,14 @@ def main(): try: logging.info('NanoLyse {} started with arguments {}'.format(__version__, args)) aligner = getIndex(args.reference) - align(aligner, sys.stdin) + if args.summary_in: + import tempfile + tmp = tempfile.TemporaryFile() + filter_reads(aligner, sys.stdin, tmp=tmp) + logging.info('Filtering the summary file.') + filter_summary(args.summary_in, args.summary_out, tmp) + else: + filter_reads(aligner, sys.stdin) logging.info('NanoLyse finished.') except Exception as e: logging.error(e, exc_info=True) @@ -53,11 +60,11 @@ def main(): def get_args(): epilog = """EXAMPLES: gunzip -c reads.fastq.gz | NanoLyse | gzip > reads_without_lambda.fastq.gz - gunzip -c reads.fastq.gz | NanoLyse | NanoFilt -q 12 | gzip > filtered_reads_without_lambda.fastq.gz - gunzip -c reads.fastq.gz | NanoLyse --reference mygenome.fa.gz | gzip > reads_without_mygenome.fastq.gz + gunzip -c reads.fastq.gz | NanoLyse | NanoFilt -q 12 | gzip > filt_reads_without_lambda.fastq.gz + gunzip -c reads.fastq.gz | NanoLyse --reference mydb.fa.gz | gzip > reads_without_mydb.fastq.gz """ parser = ArgumentParser( - description="Remove reads mapping to the lambda genome. Reads fastq from stdin and writes to stdout.", + description="Remove reads mapping to DNA CS. Reads fastq on stdin and writes to stdout.", epilog=epilog, formatter_class=custom_formatter, add_help=False) @@ -70,12 +77,17 @@ def get_args(): help="Print version and exit.", action="version", version='NanoLyse {}'.format(__version__)) + parser.add_argument("--summary_in", help="Summary file to filter") + parser.add_argument("--summary_out", help="with --summary_in: name of output file.") parser.add_argument("-r", "--reference", - help="Specify a reference fasta file against which to filter.") + help="Specify a fasta file against which to filter. Standard is DNA CS.") parser.add_argument("--logfile", help="Specify the path and filename for the log file.", default="NanoLyse.log") - return parser.parse_args() + args = parser.parse_args() + if bool(args.summary_in) != bool(args.summary_out): + sys.exit("ERROR: With --summary_in also --summary_out is required and vice versa!") + return args def getIndex(reference): @@ -98,19 +110,47 @@ def getIndex(reference): return aligner -def align(aligner, reads): +def filter_reads(aligner, reads, tmp=None): ''' Test if reads can get aligned to the lambda genome, if not: write to stdout + + if tmp is not None, then write lambda read identifiers to this file + To filter the summary file on later ''' i = 0 for record in SeqIO.parse(reads, "fastq"): try: next(aligner.map(str(record.seq))) i += 1 + if tmp: + tmp.write(record.id.encode('utf-8') + b"\n") except StopIteration: print(record.format("fastq"), end='') sys.stderr.write("NanoLyse: removed {} reads.\n".format(i)) + logging.info("NanoLyse: removed {} reads.".format(i)) + + +def filter_summary(summary_file, output, read_ids_file): + ''' + Optional function to filter entries from a sequencing_summary file + using a read_ids_file (tmp) to which the identifiers have been written + ''' + read_ids_file.seek(0) + lambda_identifiers = [line.rstrip() for line in read_ids_file] + sys.stderr.write(f"{len(lambda_identifiers)} lambda reads to remove from the summary\n") + i = 0 + j = 0 + with open(output, 'wb') as summary_out, open(summary_file, 'rb') as summary_in: + header = next(summary_in) + summary_out.write(header) + index = header.split(b'\t').index(b'read_id') + for line in summary_in: + i += 1 + if not line.split(b'\t')[index] in lambda_identifiers: + summary_out.write(line) + j += 1 + sys.stderr.write(f"summary had {i} lines, of which {j} got kept\n") if __name__ == '__main__': ===================================== nanolyse/version.py ===================================== @@ -1 +1 @@ -__version__ = "1.1.1" +__version__ = "1.2.0" ===================================== setup.py ===================================== @@ -17,7 +17,7 @@ setup( url='https://github.com/wdecoster/nanolyse', author='Wouter De Coster', author_email='[email protected]', - license='MIT', + license='GPLv3', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Science/Research', View it on GitLab: https://salsa.debian.org/med-team/nanolyse/-/compare/4a607f5fd9b826ac5e2642d5a2509dfeb2813811...60efe4b0297b711eb8eddd19015f9990a88f30ab -- View it on GitLab: https://salsa.debian.org/med-team/nanolyse/-/compare/4a607f5fd9b826ac5e2642d5a2509dfeb2813811...60efe4b0297b711eb8eddd19015f9990a88f30ab You're receiving this email because of your account on salsa.debian.org.
_______________________________________________ debian-med-commit mailing list [email protected] https://alioth-lists.debian.net/cgi-bin/mailman/listinfo/debian-med-commit
