This is an automated email from the git hooks/post-receive script. cagriulas-guest pushed a commit to branch master in repository deepnano.
commit 4e029beab17cf15380d395aa23436d229be7e477 Author: Çağrı Ulaş <[email protected]> Date: Sat Dec 17 07:47:20 2016 +0300 added autopkgtest --- debian/control | 9 + .../2016_3_4_3507_1_ch120_read521_strand.fast5.gz | Bin 0 -> 861690 bytes .../2016_3_4_3507_1_ch13_read1130_strand.fast5.gz | Bin 0 -> 1066806 bytes .../2016_3_4_3507_1_ch13_read1132_strand.fast5.gz | Bin 0 -> 1320364 bytes debian/deepnano-data.install | 2 + debian/deepnano/DEBIAN/control | 15 + debian/deepnano/DEBIAN/md5sums | 25 ++ debian/deepnano/DEBIAN/postinst | 9 + debian/deepnano/DEBIAN/prerm | 14 + debian/deepnano/usr/bin/deepnano_basecall | 5 + .../usr/bin/deepnano_basecall_no_metrichor | 1 + debian/deepnano/usr/lib/deepnano/align_2d | Bin 0 -> 43096 bytes debian/deepnano/usr/lib/deepnano/realign | Bin 0 -> 39000 bytes debian/deepnano/usr/share/deepnano/basecall.py | 185 ++++++++++ .../usr/share/deepnano/basecall_no_metrichor.py | 277 +++++++++++++++ .../share/deepnano/basecall_no_metrichor_devel.py | 371 +++++++++++++++++++++ debian/deepnano/usr/share/deepnano/helpers.py | 76 +++++ debian/deepnano/usr/share/deepnano/rnn_fin.py | 81 +++++ .../usr/share/doc/deepnano/changelog.Debian.gz | Bin 0 -> 271 bytes debian/deepnano/usr/share/doc/deepnano/copyright | 36 ++ .../doc/deepnano/examples/nets_data/map5-2d.npz.gz | Bin 0 -> 5082272 bytes .../deepnano/examples/nets_data/map5comp.npz.gz | Bin 0 -> 1592095 bytes .../deepnano/examples/nets_data/map5temp.npz.gz | Bin 0 -> 1592084 bytes .../deepnano/examples/nets_data/map6-2d-big.npz.gz | Bin 0 -> 14015984 bytes .../examples/nets_data/map6-2d-no-metr.npz.gz | Bin 0 -> 14015890 bytes .../examples/nets_data/map6-2d-no-metr10.npz.gz | Bin 0 -> 14016340 bytes .../examples/nets_data/map6-2d-no-metr20.npz.gz | Bin 0 -> 14015359 bytes .../examples/nets_data/map6-2d-no-metr23.npz.gz | Bin 0 -> 14016230 bytes .../doc/deepnano/examples/nets_data/map6-2d.npz.gz | Bin 0 -> 5081800 bytes .../deepnano/examples/nets_data/map6comp.npz.gz | Bin 0 -> 1592557 bytes .../deepnano/examples/nets_data/map6temp.npz.gz | Bin 0 -> 1592875 bytes .../2016_3_4_3507_1_ch120_read521_strand.fast5.gz | Bin 0 -> 861647 bytes .../2016_3_4_3507_1_ch13_read1130_strand.fast5.gz | Bin 0 -> 1066763 bytes .../2016_3_4_3507_1_ch13_read1132_strand.fast5.gz | Bin 0 -> 1320321 bytes .../usr/share/python/runtime.d/deepnano.rtupdate | 7 + debian/source/include-binaries | 3 + debian/tests/control | 3 + debian/tests/run-test.sh | 24 ++ 38 files changed, 1143 insertions(+) diff --git a/debian/control b/debian/control index 77bd8cc..8a20128 100644 --- a/debian/control +++ b/debian/control @@ -27,3 +27,12 @@ Description: alternative basecaller for MinION reads of genomic sequences . Currently it works with SQK-MAP-006 and SQK-MAP-005 chemistry and as a postprocessor for Metrichor. + +Package: deepnano-data +Architecture: any +Depends: deepnano +Description: alternative basecaller for MinION reads of genomic sequences + DeepNano is alternative basecaller for Oxford Nanopore MinION reads + based on deep recurrent neural networks. + . + This package contains deepnanos test data. diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz new file mode 100644 index 0000000..89e17d1 Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz differ diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz new file mode 100644 index 0000000..37234d1 Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz differ diff --git a/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz new file mode 100644 index 0000000..2c60372 Binary files /dev/null and b/debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz differ diff --git a/debian/deepnano-data.install b/debian/deepnano-data.install new file mode 100644 index 0000000..3f05815 --- /dev/null +++ b/debian/deepnano-data.install @@ -0,0 +1,2 @@ +nets_data/ usr/share/deepnano-data/ +debian/deepnano-data-files/test_data/ usr/share/deepnano-data/ diff --git a/debian/deepnano/DEBIAN/control b/debian/deepnano/DEBIAN/control new file mode 100644 index 0000000..40bb851 --- /dev/null +++ b/debian/deepnano/DEBIAN/control @@ -0,0 +1,15 @@ +Package: deepnano +Version: 0.0+20110617-1 +Architecture: amd64 +Maintainer: Debian Med Packaging Team <[email protected]> +Installed-Size: 87902 +Depends: python:any (>= 2.7.5-5~), libc6 (>= 2.2.5), libgcc1 (>= 1:3.0), libstdc++6 (>= 5.2), python-h5py, python-numpy, python-dateutil, python-theano +Section: science +Priority: optional +Homepage: https://bitbucket.org/vboza/deepnano +Description: alternative basecaller for MinION reads of genomic sequences + DeepNano is alternative basecaller for Oxford Nanopore MinION reads + based on deep recurrent neural networks. + . + Currently it works with SQK-MAP-006 and SQK-MAP-005 chemistry and as a + postprocessor for Metrichor. diff --git a/debian/deepnano/DEBIAN/md5sums b/debian/deepnano/DEBIAN/md5sums new file mode 100644 index 0000000..64127b6 --- /dev/null +++ b/debian/deepnano/DEBIAN/md5sums @@ -0,0 +1,25 @@ +cba2f62f9fc586043fc00938b0e932b6 usr/bin/deepnano_basecall +2b88df4d884e7afa2f22870458c97757 usr/lib/deepnano/align_2d +bdb5eb7d2d0b3d70145310b7131c8d02 usr/lib/deepnano/realign +bce23353ab354f2528a5de9661a5230c usr/share/deepnano/basecall.py +5e1fe3018daa7b36e249c2157411812a usr/share/deepnano/basecall_no_metrichor.py +3a4ae91d811983676c1f6237c8fec97e usr/share/deepnano/basecall_no_metrichor_devel.py +115ccfa267eb418b79d57a4aad9b039e usr/share/deepnano/helpers.py +e9bb97314500d839bb0ec8315a7a4ef9 usr/share/deepnano/rnn_fin.py +cdf6a037be6f655d9c83430fbcc6f9d4 usr/share/doc/deepnano/changelog.Debian.gz +35b0edea4c50091a781a9385b8c7705f usr/share/doc/deepnano/copyright +702509a2bdf2369f5ea14062d5ae7762 usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz +e6b1b2969b7448accf054142b846ab62 usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz +fe10cb4e2efb306594eea797ceba70e4 usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz +fb3755161d24834453c9d9d2f7db9353 usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz +818c6b69c501943804cf2aca1b5203c3 usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz +d93a44348cc5b454b15338dccec70b0f usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz +7872e4100faa2dd13e21549174b0f171 usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz +a672d7cba84ba1f8aacb36f998dc6866 usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz +273653b4f06a1529a2448c53a8dcc94c usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz +af5b1570fe91051b69e013d63bc5d446 usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz +3e5342e80bad5a6e7193db9956c6380a usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz +c9a6911fe747ab12be4721e4f543a609 usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz +2f64706324cd5e8f10666f6b19fac14c usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz +3113c8f6d453c1619ea606e7f768e10d usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz +788eec3c08bb9ed41061cccd5f6d9d05 usr/share/python/runtime.d/deepnano.rtupdate diff --git a/debian/deepnano/DEBIAN/postinst b/debian/deepnano/DEBIAN/postinst new file mode 100755 index 0000000..5aac91b --- /dev/null +++ b/debian/deepnano/DEBIAN/postinst @@ -0,0 +1,9 @@ +#!/bin/sh +set -e + +# Automatically added by dh_python2: +if which pycompile >/dev/null 2>&1; then + pycompile -p deepnano /usr/share/deepnano +fi + +# End automatically added section diff --git a/debian/deepnano/DEBIAN/prerm b/debian/deepnano/DEBIAN/prerm new file mode 100755 index 0000000..a4c1086 --- /dev/null +++ b/debian/deepnano/DEBIAN/prerm @@ -0,0 +1,14 @@ +#!/bin/sh +set -e + +# Automatically added by dh_python2: +if which pyclean >/dev/null 2>&1; then + pyclean -p deepnano +else + dpkg -L deepnano | grep \.py$ | while read file + do + rm -f "${file}"[co] >/dev/null + done +fi + +# End automatically added section diff --git a/debian/deepnano/usr/bin/deepnano_basecall b/debian/deepnano/usr/bin/deepnano_basecall new file mode 100755 index 0000000..1d79c0a --- /dev/null +++ b/debian/deepnano/usr/bin/deepnano_basecall @@ -0,0 +1,5 @@ +#!/bin/sh + +SCRIPT=`basename $0 | sed 's/^deepnano_//'` + +/usr/share/deepnano/${SCRIPT}.py $@ diff --git a/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor b/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor new file mode 120000 index 0000000..2041646 --- /dev/null +++ b/debian/deepnano/usr/bin/deepnano_basecall_no_metrichor @@ -0,0 +1 @@ +deepnano_basecall \ No newline at end of file diff --git a/debian/deepnano/usr/lib/deepnano/align_2d b/debian/deepnano/usr/lib/deepnano/align_2d new file mode 100755 index 0000000..6ce2cda Binary files /dev/null and b/debian/deepnano/usr/lib/deepnano/align_2d differ diff --git a/debian/deepnano/usr/lib/deepnano/realign b/debian/deepnano/usr/lib/deepnano/realign new file mode 100755 index 0000000..47dbc8d Binary files /dev/null and b/debian/deepnano/usr/lib/deepnano/realign differ diff --git a/debian/deepnano/usr/share/deepnano/basecall.py b/debian/deepnano/usr/share/deepnano/basecall.py new file mode 100755 index 0000000..aa81f75 --- /dev/null +++ b/debian/deepnano/usr/share/deepnano/basecall.py @@ -0,0 +1,185 @@ +#!/usr/bin/python +import argparse +from rnn_fin import RnnPredictor +import h5py +import sys +import numpy as np +import theano as th +import os +import re +import dateutil.parser +import datetime +from helpers import * + +def load_read_data(read_file): + h5 = h5py.File(read_file, "r") + ret = {} + + extract_timing(h5, ret) + + base_loc = get_base_loc(h5) + + try: + ret["called_template"] = h5[base_loc+"/BaseCalled_template/Fastq"][()].split('\n')[1] + ret["called_complement"] = h5[base_loc+"/BaseCalled_complement/Fastq"][()].split('\n')[1] + ret["called_2d"] = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Fastq"][()].split('\n')[1] + except Exception as e: + pass + try: + events = h5[base_loc+"/BaseCalled_template/Events"] + tscale, tscale_sd, tshift, tdrift = extract_scaling(h5, "template", base_loc) + ret["temp_events"] = extract_1d_event_data( + h5, "template", base_loc, tscale, tscale_sd, tshift, tdrift) + except: + pass + + try: + cscale, cscale_sd, cshift, cdrift = extract_scaling(h5, "complement", base_loc) + ret["comp_events"] = extract_1d_event_data( + h5, "complement", base_loc, cscale, cscale_sd, cshift, cdrift) + except Exception as e: + pass + + try: + al = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Alignment"] + temp_events = h5[base_loc+"/BaseCalled_template/Events"] + comp_events = h5[base_loc+"/BaseCalled_complement/Events"] + ret["2d_events"] = [] + for a in al: + ev = [] + if a[0] == -1: + ev += [0, 0, 0, 0, 0] + else: + e = temp_events[a[0]] + mean = (e["mean"] - tshift) / cscale + stdv = e["stdv"] / tscale_sd + length = e["length"] + ev += [1] + preproc_event(mean, stdv, length) + if a[1] == -1: + ev += [0, 0, 0, 0, 0] + else: + e = comp_events[a[1]] + mean = (e["mean"] - cshift) / cscale + stdv = e["stdv"] / cscale_sd + length = e["length"] + ev += [1] + preproc_event(mean, stdv, length) + ret["2d_events"].append(ev) + ret["2d_events"] = np.array(ret["2d_events"], dtype=np.float32) + except Exception as e: + print e + pass + + h5.close() + return ret + +parser = argparse.ArgumentParser() +parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz") +parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz") +parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-big.npz") +parser.add_argument('reads', type=str, nargs='*') +parser.add_argument('--timing', action='store_true', default=False) +parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement") +parser.add_argument('--output', type=str, default="output.fasta") +parser.add_argument('--output_orig', action='store_true', default=False) +parser.add_argument('--directory', type=str, default='', help="Directory where read files are stored") + +args = parser.parse_args() +types = args.type.split(',') +do_template = False +do_complement = False +do_2d = False + +if "all" in types or "template" in types: + do_template = True +if "all" in types or "complement" in types: + do_complement = True +if "all" in types or "2d" in types: + do_2d = True + +assert do_template or do_complement or do_2d, "Nothing to do" +assert len(args.reads) != 0 or len(args.directory) != 0, "Nothing to basecall" + +if do_template: + print "loading template net" + temp_net = RnnPredictor(args.template_net) + print "done" +if do_complement: + print "loading complement net" + comp_net = RnnPredictor(args.complement_net) + print "done" +if do_2d: + print "loading 2D net" + big_net = RnnPredictor(args.big_net) + print "done" + +chars = "ACGT" +mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4} + +fo = open(args.output, "w") + +total_bases = [0, 0, 0] + +files = args.reads +if len(args.directory): + files += [os.path.join(args.directory, x) for x in os.listdir(args.directory)] + +for i, read in enumerate(files): + basename = os.path.basename(read) + try: + data = load_read_data(read) + except Exception as e: + print "error at file", read + print e + continue + if not data: + continue + print "\rcalling read %d/%d %s" % (i, len(files), read), + sys.stdout.flush() + if args.output_orig: + try: + if "called_template" in data: + print >>fo, ">%s_template" % basename + print >>fo, data["called_template"] + if "called_complement" in data: + print >>fo, ">%s_complement" % basename + print >>fo, data["called_complement"] + if "called_2d" in data: + print >>fo, ">%s_2d" % basename + print >>fo, data["called_2d"] + except: + pass + + temp_start = datetime.datetime.now() + if do_template and "temp_events" in data: + predict_and_write(data["temp_events"], temp_net, fo, "%s_template_rnn" % basename) + temp_time = datetime.datetime.now() - temp_start + + comp_start = datetime.datetime.now() + if do_complement and "comp_events" in data: + predict_and_write(data["comp_events"], comp_net, fo, "%s_complement_rnn" % basename) + comp_time = datetime.datetime.now() - comp_start + + start_2d = datetime.datetime.now() + if do_2d and "2d_events" in data: + predict_and_write(data["2d_events"], big_net, fo, "%s_2d_rnn" % basename) + time_2d = datetime.datetime.now() - start_2d + + if args.timing: + try: + print "Events: %d/%d" % (len(data["temp_events"]), len(data["comp_events"])) + print "Our times: %f/%f/%f" % (temp_time.total_seconds(), comp_time.total_seconds(), + time_2d.total_seconds()) + print "Our times per base: %f/%f/%f" % ( + temp_time.total_seconds() / len(data["temp_events"]), + comp_time.total_seconds() / len(data["comp_events"]), + time_2d.total_seconds() / (len(data["comp_events"]) + len(data["temp_events"]))) + print "Their times: %f/%f/%f" % (data["temp_time"].total_seconds(), data["comp_time"].total_seconds(), data["2d_time"].total_seconds()) + print "Their times per base: %f/%f/%f" % ( + data["temp_time"].total_seconds() / len(data["temp_events"]), + data["comp_time"].total_seconds() / len(data["comp_events"]), + data["2d_time"].total_seconds() / (len(data["comp_events"]) + len(data["temp_events"]))) + except: + # Don't let timing throw us out + pass + fo.flush() +fo.close() diff --git a/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py new file mode 100755 index 0000000..50b8dbc --- /dev/null +++ b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor.py @@ -0,0 +1,277 @@ +#!/usr/bin/python +import argparse +from rnn_fin import RnnPredictor +import h5py +import sys +import numpy as np +import theano as th +import os +import re +import dateutil.parser +import datetime +from helpers import * +import subprocess +import time + +def get_scaling_template(events, has_std): + down = 48.4631279889 + up = 65.7312554591 + our_down = np.percentile(events["mean"], 10) + our_up = np.percentile(events["mean"], 90) + scale = (our_up - our_down) / (up - down) + shift = (our_up / scale - up) * scale + + sd = 0.807981325017 + if has_std: + return scale, np.percentile(events["stdv"], 50) / sd, shift + else: + return scale, np.sqrt(np.percentile(events["variance"], 50)) / sd, shift + + +def get_scaling_complement(events, has_std): + down = 49.2638926877 + up = 69.0192568072 + our_down = np.percentile(events["mean"], 10) + our_up = np.percentile(events["mean"], 90) + scale = (our_up - our_down) / (up - down) + shift = (our_up / scale - up) * scale + + sd = 1.04324844612 + if has_std: + return scale, np.percentile(events["stdv"], 50) / sd, shift + else: + return scale, np.sqrt(np.percentile(events["variance"], 50)) / sd, shift + +def template_complement_loc(events): + abasic_level = np.percentile(events["mean"], 99) + 5 + abasic_locs = (events["mean"] > abasic_level).nonzero()[0] + last = -47 + run_len = 1 + runs = [] + for x in abasic_locs: + if x - last == 1: + run_len += 1 + else: + if run_len >= 5: + if len(runs) and last - runs[-1][0] < 50: + run_len = last - runs[-1][0] + run_len += runs[-1][1] + runs[-1] = (last, run_len) + else: + runs.append((last, run_len)) + run_len = 1 + last = x + to_sort = [] + mid = len(events) / 2 + low_third = len(events) / 3 + high_third = len(events) / 3 * 2 + for r in runs: + if r[0] < low_third: + continue + if r[0] > high_third: + continue + to_sort.append((abs(r[0] - mid), r[0] - r[1], r[0])) + to_sort.sort() + if len(to_sort) == 0: + return None + trim_size = 10 + return {"temp": (trim_size, to_sort[0][1] - trim_size), + "comp": (to_sort[0][2] + trim_size, len(events) - trim_size)} + +def load_read_data(read_file): + h5 = h5py.File(read_file, "r") + ret = {} + + read_key = h5["Analyses/EventDetection_000/Reads"].keys()[0] + base_events = h5["Analyses/EventDetection_000/Reads"][read_key]["Events"] + temp_comp_loc = template_complement_loc(base_events) + sampling_rate = h5["UniqueGlobalKey/channel_id"].attrs["sampling_rate"] + + if temp_comp_loc: + events = base_events[temp_comp_loc["temp"][0]:temp_comp_loc["temp"][1]] + else: + events = base_events + has_std = True + try: + std = events[0]["stdv"] + except: + has_std = False + tscale2, tscale_sd2, tshift2 = get_scaling_template(events, has_std) + + index = 0.0 + ret["temp_events2"] = [] + for e in events: + mean = (e["mean"] - tshift2) / tscale2 + if has_std: + stdv = e["stdv"] / tscale_sd2 + else: + stdv = np.sqrt(e["variance"]) / tscale_sd2 + length = e["length"] / sampling_rate + ret["temp_events2"].append(preproc_event(mean, stdv, length)) + + ret["temp_events2"] = np.array(ret["temp_events2"], dtype=np.float32) + + if not temp_comp_loc: + return ret + + events = base_events[temp_comp_loc["comp"][0]:temp_comp_loc["comp"][1]] + cscale2, cscale_sd2, cshift2 = get_scaling_complement(events, has_std) + + index = 0.0 + ret["comp_events2"] = [] + for e in events: + mean = (e["mean"] - cshift2) / cscale2 + if has_std: + stdv = e["stdv"] / cscale_sd2 + else: + stdv = np.sqrt(e["variance"]) / cscale_sd2 + length = e["length"] / sampling_rate + ret["comp_events2"].append(preproc_event(mean, stdv, length)) + + ret["comp_events2"] = np.array(ret["comp_events2"], dtype=np.float32) + + return ret + +def basecall(read_file_name, fo): + basename = os.path.basename(read_file_name) + try: + data = load_read_data(read_file_name) + except Exception as e: + print e + print "error at file", read_file_name + return + + if do_template or do_2d: + o1, o2 = predict_and_write( + data["temp_events2"], temp_net, + fo if do_template else None, + "%s_template_rnn" % basename) + + if (do_complement or do_2d) and "comp_events2" in data: + o1c, o2c = predict_and_write( + data["comp_events2"], comp_net, + fo if do_complement else None, + "%s_complement_rnn" % basename) + + if do_2d and "comp_events2" in data and\ + len(data["comp_events2"]) <= args.max_2d_length and\ + len(data["temp_events2"]) <= args.max_2d_length: + p = subprocess.Popen("/usr/lib/deepnano/align_2d", stdin=subprocess.PIPE, stdout=subprocess.PIPE) + f2d = p.stdin + print >>f2d, len(o1)+len(o2) + for a, b in zip(o1, o2): + print >>f2d, " ".join(map(str, a)) + print >>f2d, " ".join(map(str, b)) + print >>f2d, len(o1c)+len(o2c) + for a, b in zip(o1c, o2c): + print >>f2d, " ".join(map(str, a)) + print >>f2d, " ".join(map(str, b)) + f2do, f2de = p.communicate() + if p.returncode != 0: + return + lines = f2do.strip().split('\n') + print >>fo, ">%s_2d_rnn_simple" % basename + print >>fo, lines[0].strip() + events_2d = [] + for l in lines[1:]: + temp_ind, comp_ind = map(int, l.strip().split()) + e = [] + if temp_ind == -1: + e += [0, 0, 0, 0, 0] + else: + e += [1] + list(data["temp_events2"][temp_ind]) + if comp_ind == -1: + e += [0, 0, 0, 0, 0] + else: + e += [1] + list(data["comp_events2"][comp_ind]) + events_2d.append(e) + events_2d = np.array(events_2d, dtype=np.float32) + predict_and_write(events_2d, big_net, fo, "%s_2d_rnn" % basename) + +parser = argparse.ArgumentParser() +parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz") +parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz") +parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-no-metr23.npz") +parser.add_argument('--max_2d_length', type=int, default=10000, help='Max length for 2d basecall') +parser.add_argument('reads', type=str, nargs='*') +parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement") +parser.add_argument('--output', type=str, default="output.fasta") +parser.add_argument('--directory', type=str, default='', help="Directory where read files are stored") +parser.add_argument('--watch', type=str, default='', help='Watched directory') + + +args = parser.parse_args() +types = args.type.split(',') +do_template = False +do_complement = False +do_2d = False + +if "all" in types or "template" in types: + do_template = True +if "all" in types or "complement" in types: + do_complement = True +if "all" in types or "2d" in types: + do_2d = True + +assert do_template or do_complement or do_2d, "Nothing to do" +assert len(args.reads) != 0 or len(args.directory) != 0 or len(args.watch) != 0, "Nothing to basecall" + +if do_template or do_2d: + print "loading template net" + temp_net = RnnPredictor(args.template_net) + print "done" +if do_complement or do_2d: + print "loading complement net" + comp_net = RnnPredictor(args.complement_net) + print "done" +if do_2d: + print "loading 2D net" + big_net = RnnPredictor(args.big_net) + print "done" + +chars = "ACGT" +mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4} + +if len(args.reads) or len(args.directory) != 0: + fo = open(args.output, "w") + + files = args.reads + if len(args.directory): + files += [os.path.join(args.directory, x) for x in os.listdir(args.directory)] + + for i, read in enumerate(files): + basecall(read, fo) + + fo.close() + +if len(args.watch) != 0: + try: + from watchdog.observers import Observer + from watchdog.events import PatternMatchingEventHandler + except: + print "Please install watchdog to watch directories" + sys.exit() + + class Fast5Handler(PatternMatchingEventHandler): + """Class for handling creation fo fast5-files""" + patterns = ["*.fast5"] + def on_created(self, event): + print "Calling", event + file_name = str(os.path.basename(event.src_path)) + fasta_file_name = os.path.splitext(event.src_path)[0] + '.fasta' + with open(fasta_file_name, "w") as fo: + basecall(event.src_path, fo) + print('Watch dir: ' + args.watch) + observer = Observer() + print('Starting Observerer') + # start watching directory for fast5-files + observer.start() + observer.schedule(Fast5Handler(), path=args.watch) + try: + while True: + time.sleep(1) + # quit script using ctrl+c + except KeyboardInterrupt: + observer.stop() + + observer.join() diff --git a/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py new file mode 100644 index 0000000..488fee3 --- /dev/null +++ b/debian/deepnano/usr/share/deepnano/basecall_no_metrichor_devel.py @@ -0,0 +1,371 @@ +import argparse +from rnn_fin import RnnPredictor +import h5py +import sys +import numpy as np +import theano as th +import os +import re +import dateutil.parser +import datetime + +def preproc_event(mean, std, length): + mean = mean / 100.0 - 0.66 + std = std - 1 + return [mean, mean*mean, std, length] + +def get_scaling_template(events): + down = 48.4631279889 + up = 65.7312554591 + our_down = np.percentile(events["mean"], 10) + our_up = np.percentile(events["mean"], 90) + scale = (our_up - our_down) / (up - down) + shift = (our_up / scale - up) * scale + + sd = 0.807981325017 + return scale, np.percentile(events["stdv"], 50) / sd, shift + +def get_scaling_complement(events): + down = 49.2638926877 + up = 69.0192568072 + our_down = np.percentile(events["mean"], 10) + our_up = np.percentile(events["mean"], 90) + scale = (our_up - our_down) / (up - down) + shift = (our_up / scale - up) * scale + + sd = 1.04324844612 + return scale, np.percentile(events["stdv"], 50) / sd, shift + +def template_complement_loc(events): + abasic_level = np.percentile(events["mean"], 99) + 5 + abasic_locs = (events["mean"] > abasic_level).nonzero()[0] + last = -47 + run_len = 1 + runs = [] + for x in abasic_locs: + if x - last == 1: + run_len += 1 + else: + if run_len >= 5: + if len(runs) and last - runs[-1][0] < 50: + run_len = last - runs[-1][0] + run_len += runs[-1][1] + runs[-1] = (last, run_len) + else: + runs.append((last, run_len)) + run_len = 1 + last = x + to_sort = [] + mid = len(events) / 2 + low_third = len(events) / 3 + high_third = len(events) / 3 * 2 + for r in runs: + if r[0] < low_third: + continue + if r[0] > high_third: + continue + to_sort.append((abs(r[0] - mid), r[0] - r[1], r[0])) + to_sort.sort() + if len(to_sort) == 0: + return None + trim_size = 10 + return {"temp": (trim_size, to_sort[0][1] - trim_size), + "comp": (to_sort[0][2] + trim_size, len(events) - trim_size)} + +def load_read_data(read_file): + h5 = h5py.File(read_file, "r") + ret = {} + + read_key = h5["Analyses/EventDetection_000/Reads"].keys()[0] + base_events = h5["Analyses/EventDetection_000/Reads"][read_key]["Events"] + temp_comp_loc = template_complement_loc(base_events) + if not temp_comp_loc: + return None + +# print "temp_comp_loc", temp_comp_loc["temp"], temp_comp_loc["comp"] +# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["start_index_temp"], +# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["end_index_temp"], +# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["start_index_comp"], +# print h5["Analyses/Basecall_2D_000/Summary/split_hairpin"].attrs["end_index_comp"] + + sampling_rate = h5["UniqueGlobalKey/channel_id"].attrs["sampling_rate"] + + try: + ret["called_template"] = h5["Analyses/Basecall_2D_000/BaseCalled_template/Fastq"][()].split('\n')[1] + ret["called_complement"] = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Fastq"][()].split('\n')[1] + ret["called_2d"] = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Fastq"][()].split('\n')[1] + except Exception as e: + print "wat", e + return None + events = base_events[temp_comp_loc["temp"][0]:temp_comp_loc["temp"][1]] + tscale2, tscale_sd2, tshift2 = get_scaling_template(events) + + index = 0.0 + ret["temp_events2"] = [] + for e in events: + mean = (e["mean"] - tshift2) / tscale2 + stdv = e["stdv"] / tscale_sd2 + length = e["length"] / sampling_rate + ret["temp_events2"].append(preproc_event(mean, stdv, length)) + events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"] + tscale = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["scale"] + tscale_sd = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["scale_sd"] + tshift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["shift"] + tdrift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_template"].attrs["drift"] + index = 0.0 + ret["temp_events"] = [] + for e in events: + mean = (e["mean"] - tshift - index * tdrift) / tscale + stdv = e["stdv"] / tscale_sd + length = e["length"] + ret["temp_events"].append(preproc_event(mean, stdv, length)) + index += e["length"] + + events = base_events[temp_comp_loc["comp"][0]:temp_comp_loc["comp"][1]] + cscale2, cscale_sd2, cshift2 = get_scaling_complement(events) + + index = 0.0 + ret["comp_events2"] = [] + for e in events: + mean = (e["mean"] - cshift2) / cscale2 + stdv = e["stdv"] / cscale_sd2 + length = e["length"] / sampling_rate + ret["comp_events2"].append(preproc_event(mean, stdv, length)) + + events = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Events"] + cscale = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["scale"] + cscale_sd = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["scale_sd"] + cshift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["shift"] + cdrift = h5["/Analyses/Basecall_2D_000/Summary/basecall_1d_complement"].attrs["drift"] + index = 0.0 + ret["comp_events"] = [] + for e in events: + mean = (e["mean"] - cshift - index * cdrift) / cscale + stdv = e["stdv"] / cscale_sd + length = e["length"] + ret["comp_events"].append(preproc_event(mean, stdv, length)) + index += e["length"] + + ret["temp_events2"] = np.array(ret["temp_events2"], dtype=np.float32) + ret["comp_events2"] = np.array(ret["comp_events2"], dtype=np.float32) + ret["temp_events"] = np.array(ret["temp_events"], dtype=np.float32) + ret["comp_events"] = np.array(ret["comp_events"], dtype=np.float32) + + al = h5["Analyses/Basecall_2D_000/BaseCalled_2D/Alignment"] + ret["al"] = al + temp_events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"] + comp_events = h5["Analyses/Basecall_2D_000/BaseCalled_complement/Events"] + ret["2d_events"] = [] + for a in al: + ev = [] + if a[0] == -1: + ev += [0, 0, 0, 0, 0] + else: + e = temp_events[a[0]] + mean = (e["mean"] - tshift - index * tdrift) / cscale + stdv = e["stdv"] / tscale_sd + length = e["length"] + ev += [1] + preproc_event(mean, stdv, length) + if a[1] == -1: + ev += [0, 0, 0, 0, 0] + else: + e = comp_events[a[1]] + mean = (e["mean"] - cshift - index * cdrift) / cscale + stdv = e["stdv"] / cscale_sd + length = e["length"] + ev += [1] + preproc_event(mean, stdv, length) + ret["2d_events"].append(ev) + ret["2d_events"] = np.array(ret["2d_events"], dtype=np.float32) + return ret + +parser = argparse.ArgumentParser() +parser.add_argument('--template_net', type=str, default="nets_data/map6temp.npz") +parser.add_argument('--complement_net', type=str, default="nets_data/map6comp.npz") +parser.add_argument('--big_net', type=str, default="nets_data/map6-2d-big.npz") +parser.add_argument('reads', type=str, nargs='+') +parser.add_argument('--type', type=str, default="all", help="One of: template, complement, 2d, all, use comma to separate multiple options, eg.: template,complement") +parser.add_argument('--output', type=str, default="output.fasta") +parser.add_argument('--output_orig', action='store_true', default=True) + +args = parser.parse_args() +types = args.type.split(',') +do_template = False +do_complement = False +do_2d = False + +if "all" in types or "template" in types: + do_template = True +if "all" in types or "complement" in types: + do_complement = True +if "all" in types or "2d" in types: + do_2d = True + +assert do_template or do_complement or do_2d, "Nothing to do" + +if do_template or do_2d: + print "loading template net" + temp_net = RnnPredictor(args.template_net) + print "done" +if do_complement or do_2d: + print "loading complement net" + comp_net = RnnPredictor(args.complement_net) + print "done" +if do_2d: + print "loading 2D net" + big_net = RnnPredictor(args.big_net) + big_net_orig = RnnPredictor("nets_data/map6-2d-big.npz") + print "done" + +chars = "ACGT" +mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4} + +fo = open(args.output, "w") + +total_bases = [0, 0, 0] + +for i, read in enumerate(args.reads): + if True: + data = load_read_data(read) +# except Exception as e: +# print e +# print "error at file", read +# continue + if not data: + continue + if args.output_orig: + print >>fo, ">%d_template" % i + print >>fo, data["called_template"] + print >>fo, ">%d_complement" % i + print >>fo, data["called_complement"] + print >>fo, ">%d_2d" % i + print >>fo, data["called_2d"] + + if do_template or do_2d: + o1, o2 = temp_net.predict(data["temp_events"]) + o1m = (np.argmax(o1, 1)) + o2m = (np.argmax(o2, 1)) + print >>fo, ">%d_temp_rnn" % i + for a, b in zip(o1m, o2m): + if a < 4: + fo.write(chars[a]) + if b < 4: + fo.write(chars[b]) + fo.write('\n') + o1, o2 = temp_net.predict(data["temp_events2"]) + o1m = (np.argmax(o1, 1)) + o2m = (np.argmax(o2, 1)) + if do_template: + print >>fo, ">%d_temp_rnn2" % i + for a, b in zip(o1m, o2m): + if a < 4: + fo.write(chars[a]) + if b < 4: + fo.write(chars[b]) + fo.write('\n') + + if do_complement or do_2d: + o1c, o2c = comp_net.predict(data["comp_events"]) + o1cm = (np.argmax(o1c, 1)) + o2cm = (np.argmax(o2c, 1)) + print >>fo, ">%d_comp_rnn" % i + for a, b in zip(o1cm, o2cm): + if a < 4: + fo.write(chars[a]) + if b < 4: + fo.write(chars[b]) + fo.write('\n') + o1c, o2c = comp_net.predict(data["comp_events2"]) + o1cm = (np.argmax(o1c, 1)) + o2cm = (np.argmax(o2c, 1)) + if do_complement: + print >>fo, ">%d_comp_rnn2" % i + for a, b in zip(o1cm, o2cm): + if a < 4: + fo.write(chars[a]) + if b < 4: + fo.write(chars[b]) + fo.write('\n') + + if do_2d: + f2d = open("2d.in", "w") + print >>f2d, len(o1)+len(o2) + for a, b in zip(o1, o2): + print >>f2d, " ".join(map(str, a)) + print >>f2d, " ".join(map(str, b)) + print >>f2d, len(o1c)+len(o2c) + for a, b in zip(o1c, o2c): + print >>f2d, " ".join(map(str, a)) + print >>f2d, " ".join(map(str, b)) + f2d.close() + os.system("/usr/lib/deepnano/align_2d <2d.in >2d.out") + f2do = open("2d.out") + call2d = f2do.next().strip() + print >>fo, ">%d_2d_rnn_simple" % i + print >>fo, call2d + + start_temp_ours = None + end_temp_ours = None + start_comp_ours = None + end_comp_ours = None + events_2d = [] + for l in f2do: + temp_ind, comp_ind = map(int, l.strip().split()) + e = [] + if temp_ind == -1: + e += [0, 0, 0, 0, 0] + else: + e += [1] + list(data["temp_events2"][temp_ind]) + if not start_temp_ours: + start_temp_ours = temp_ind + end_temp_ours = temp_ind + if comp_ind == -1: + e += [0, 0, 0, 0, 0] + else: + e += [1] + list(data["comp_events2"][comp_ind]) + if not end_comp_ours: + end_comp_ours = comp_ind + start_comp_ours = comp_ind + events_2d.append(e) + events_2d = np.array(events_2d, dtype=np.float32) + o1c, o2c = big_net.predict(events_2d) + o1cm = (np.argmax(o1c, 1)) + o2cm = (np.argmax(o2c, 1)) + print >>fo, ">%d_2d_rnn2" % i + for a, b in zip(o1cm, o2cm): + if a < 4: + fo.write(chars[a]) + if b < 4: + fo.write(chars[b]) + fo.write('\n') + o1c, o2c = big_net.predict(data["2d_events"]) + o1cm = (np.argmax(o1c, 1)) + o2cm = (np.argmax(o2c, 1)) + print >>fo, ">%d_2d_rnn" % i + for a, b in zip(o1cm, o2cm): + if a < 4: + fo.write(chars[a]) + if b < 4: + fo.write(chars[b]) + fo.write('\n') + + start_temp_th = None + end_temp_th = None + start_comp_th = None + end_comp_th = None + for a in data["al"]: + if a[0] != -1: + if not start_temp_th: + start_temp_th = a[0] + end_temp_th = a[0] + if a[1] != -1: + if not end_comp_th: + end_comp_th = a[1] + start_comp_th = a[1] + + print "Ours:", + print start_temp_ours, end_temp_ours, start_comp_ours, end_comp_ours, + print 1. * len(events_2d) / (end_temp_ours - start_temp_ours + end_comp_ours - start_comp_ours) + print "Their:", + print start_temp_th, end_temp_th, start_comp_th, end_comp_th, + print 1. * len(data["al"]) / (end_temp_th - start_temp_th + end_comp_th - start_comp_th) + print diff --git a/debian/deepnano/usr/share/deepnano/helpers.py b/debian/deepnano/usr/share/deepnano/helpers.py new file mode 100644 index 0000000..6808562 --- /dev/null +++ b/debian/deepnano/usr/share/deepnano/helpers.py @@ -0,0 +1,76 @@ +from rnn_fin import RnnPredictor +import h5py +import sys +import numpy as np +import theano as th +import os +import re +import dateutil.parser +import datetime +import argparse + +chars = "ACGT" +mapping = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4} + +def preproc_event(mean, std, length): + mean = mean / 100.0 - 0.66 + std = std - 1 + return [mean, mean*mean, std, length] + +def predict_and_write(events, ntwk, fo, read_name): + o1, o2 = ntwk.predict(events) + if fo: + o1m = (np.argmax(o1, 1)) + o2m = (np.argmax(o2, 1)) + print >>fo, ">%s" % read_name + for a, b in zip(o1m, o2m): + if a < 4: + fo.write(chars[a]) + if b < 4: + fo.write(chars[b]) + fo.write('\n') + return o1, o2 + +def extract_timing(h5, ret): + try: + log = h5["Analyses/Basecall_2D_000/Log"][()] + temp_time = dateutil.parser.parse(re.search(r"(.*) Basecalling template.*", log).groups()[0]) + comp_time = dateutil.parser.parse(re.search(r"(.*) Basecalling complement.*", log).groups()[0]) + comp_end_time = dateutil.parser.parse(re.search(r"(.*) Aligning hairpin.*", log).groups()[0]) + + start_2d_time = dateutil.parser.parse(re.search(r"(.*) Performing full 2D.*", log).groups()[0]) + end_2d_time = dateutil.parser.parse(re.search(r"(.*) Workflow completed.*", log).groups()[0]) + + ret["temp_time"] = comp_time - temp_time + ret["comp_time"] = comp_end_time - comp_time + ret["2d_time"] = end_2d_time - start_2d_time + except: + pass + +def get_base_loc(h5): + base_loc = "Analyses/Basecall_2D_000" + try: + events = h5["Analyses/Basecall_2D_000/BaseCalled_template/Events"] + except: + base_loc = "Analyses/Basecall_1D_000" + return base_loc + +def extract_scaling(h5, read_type, base_loc): + scale = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["scale"] + scale_sd = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["scale_sd"] + shift = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["shift"] + drift = h5[base_loc+"/Summary/basecall_1d_"+read_type].attrs["drift"] + return scale, scale_sd, shift, drift + +def extract_1d_event_data(h5, read_type, base_loc, scale, scale_sd, shift, drift): + events = h5[base_loc+"/BaseCalled_%s/Events" % read_type] + index = 0.0 + data = [] + for e in events: + mean = (e["mean"] - shift - index * drift) / scale + stdv = e["stdv"] / scale_sd + length = e["length"] + data.append(preproc_event(mean, stdv, length)) + index += e["length"] + return np.array(data, dtype=np.float32) + diff --git a/debian/deepnano/usr/share/deepnano/rnn_fin.py b/debian/deepnano/usr/share/deepnano/rnn_fin.py new file mode 100644 index 0000000..a1795e8 --- /dev/null +++ b/debian/deepnano/usr/share/deepnano/rnn_fin.py @@ -0,0 +1,81 @@ +import theano as th +import theano.tensor as T +from theano.tensor.nnet import sigmoid +import numpy as np +import pickle + +def share(array, dtype=th.config.floatX, name=None): + return th.shared(value=np.asarray(array, dtype=dtype), name=name) + +class OutLayer: + def __init__(self, input, in_size, n_classes): + w = share(np.zeros((in_size, n_classes))) + b = share(np.zeros(n_classes)) + eps = 0.0000001 + self.output = T.clip(T.nnet.softmax(T.dot(input, w) + b), eps, 1-eps) + self.params = [w, b] + +class SimpleLayer: + def __init__(self, input, nin, nunits): + id = str(np.random.randint(0, 10000000)) + wio = share(np.zeros((nin, nunits)), name="wio"+id) # input to output + wir = share(np.zeros((nin, nunits)), name="wir"+id) # input to output + wiu = share(np.zeros((nin, nunits)), name="wiu"+id) # input to output + woo = share(np.zeros((nunits, nunits)), name="woo"+id) # output to output + wou = share(np.zeros((nunits, nunits)), name="wou"+id) # output to output + wor = share(np.zeros((nunits, nunits)), name="wor"+id) # output to output + bo = share(np.zeros(nunits), name="bo"+id) + bu = share(np.zeros(nunits), name="bu"+id) + br = share(np.zeros(nunits), name="br"+id) + h0 = share(np.zeros(nunits), name="h0"+id) + + def step(in_t, out_tm1): + update_gate = sigmoid(T.dot(out_tm1, wou) + T.dot(in_t, wiu) + bu) + reset_gate = sigmoid(T.dot(out_tm1, wor) + T.dot(in_t, wir) + br) + new_val = T.tanh(T.dot(in_t, wio) + reset_gate * T.dot(out_tm1, woo) + bo) + return update_gate * out_tm1 + (1 - update_gate) * new_val + + self.output, _ = th.scan( + step, sequences=[input], + outputs_info=[h0]) + + self.params = [wio, woo, bo, wir, wiu, wor, wou, br, bu, h0] + +class BiSimpleLayer(): + def __init__(self, input, nin, nunits): + fwd = SimpleLayer(input, nin, nunits) + bwd = SimpleLayer(input[::-1], nin, nunits) + self.params = fwd.params + bwd.params + self.output = T.concatenate([fwd.output, bwd.output[::-1]], axis=1) + +class RnnPredictor: + def __init__(self, filename): + package = np.load(filename) + assert(len(package.files) % 20 == 4) + n_layers = len(package.files) / 20 + + self.input = T.fmatrix() + last_output = self.input + last_size = package['arr_0'].shape[0] + hidden_size = package['arr_0'].shape[1] + par_index = 0 + for i in range(n_layers): + layer = BiSimpleLayer(last_output, last_size, hidden_size) + for i in range(20): + layer.params[i].set_value(package['arr_%d' % par_index]) + par_index += 1 + + last_output = layer.output + last_size = 2*hidden_size + out_layer1 = OutLayer(last_output, last_size, 5) + for i in range(2): + out_layer1.params[i].set_value(package['arr_%d' % par_index]) + par_index += 1 + out_layer2 = OutLayer(last_output, last_size, 5) + for i in range(2): + out_layer2.params[i].set_value(package['arr_%d' % par_index]) + par_index += 1 + output1 = out_layer1.output + output2 = out_layer2.output + + self.predict = th.function(inputs=[self.input], outputs=[output1, output2]) diff --git a/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz b/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz new file mode 100644 index 0000000..e9af2e1 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/changelog.Debian.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/copyright b/debian/deepnano/usr/share/doc/deepnano/copyright new file mode 100644 index 0000000..573e566 --- /dev/null +++ b/debian/deepnano/usr/share/doc/deepnano/copyright @@ -0,0 +1,36 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: DeepNano +Source: https://bitbucket.org/vboza/deepnano +Files-Excluded: training/realign + +Files: * +Copyright: 2016, Vladimir Boza, Comenius University +License: BSD-3-clause + +Files: debian/* +Copyright: 2016 Andreas Tille <[email protected]> +License: BSD-3-clause + +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Comenius University nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL COMENIUS UNIVERSITY BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz new file mode 100644 index 0000000..d08f7f0 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5-2d.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz new file mode 100644 index 0000000..18ade24 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5comp.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz new file mode 100644 index 0000000..9ec060f Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map5temp.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz new file mode 100644 index 0000000..3767dcb Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-big.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz new file mode 100644 index 0000000..3593302 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz new file mode 100644 index 0000000..aa6558f Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr10.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz new file mode 100644 index 0000000..07ca3cc Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr20.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz new file mode 100644 index 0000000..98b4293 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d-no-metr23.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz new file mode 100644 index 0000000..8c472c0 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6-2d.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz new file mode 100644 index 0000000..f6e0bd4 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6comp.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz new file mode 100644 index 0000000..12e5a7a Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/nets_data/map6temp.npz.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz new file mode 100644 index 0000000..44756f3 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz new file mode 100644 index 0000000..8aa7850 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz differ diff --git a/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz new file mode 100644 index 0000000..699f576 Binary files /dev/null and b/debian/deepnano/usr/share/doc/deepnano/examples/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz differ diff --git a/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate b/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate new file mode 100755 index 0000000..4563b9e --- /dev/null +++ b/debian/deepnano/usr/share/python/runtime.d/deepnano.rtupdate @@ -0,0 +1,7 @@ +#! /bin/sh +set -e + +if [ "$1" = rtupdate ]; then + pyclean -p deepnano /usr/share/deepnano + pycompile -p deepnano /usr/share/deepnano +fi \ No newline at end of file diff --git a/debian/source/include-binaries b/debian/source/include-binaries new file mode 100644 index 0000000..1e5cce3 --- /dev/null +++ b/debian/source/include-binaries @@ -0,0 +1,3 @@ +debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch120_read521_strand.fast5.gz +debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1132_strand.fast5.gz +debian/deepnano-data-files/test_data/2016_3_4_3507_1_ch13_read1130_strand.fast5.gz diff --git a/debian/tests/control b/debian/tests/control new file mode 100644 index 0000000..a4ece15 --- /dev/null +++ b/debian/tests/control @@ -0,0 +1,3 @@ +Tests: run-test.sh +Depends: @, deepnano-data +Restrictions: allow-stderr diff --git a/debian/tests/run-test.sh b/debian/tests/run-test.sh new file mode 100644 index 0000000..7b7aaf6 --- /dev/null +++ b/debian/tests/run-test.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +pkg_name="deepnano" +test_required_pkg="deepnano-data" + +if [ "$AUTOPKGTEST_TMP" = "" ] ; then + AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX) + trap "rm -rf $AUTOPKGTEST_TMP" 0 INT QUIT ABRT PIPE TERM +fi + +cp -a /usr/share/${test_required_pkg}/* $AUTOPKGTEST_TMP + +cd $AUTOPKGTEST_TMP +find . -name "*gz" -exec gunzip \{\} \; + +echo -e "\n#1 - deepnano_basecall" +OMP_NUM_THREADS=`nproc` deepnano_basecall test_data/* +cat output.fasta + +echo -e "\n#2 - deepnano_basecall_no_metrichor" +OMP_NUM_THREADS=`nproc` deepnano_basecall_no_metrichor test_data/* + +echo "PASS" -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/deepnano.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
