This is an automated email from the git hooks/post-receive script. sascha-guest pushed a commit to branch master in repository iva.
commit 0bb6f953ae62ed493a692729ce295d4a9b3f2c64 Author: Sascha Steinbiss <[email protected]> Date: Thu Apr 28 21:37:06 2016 +0000 Imported Upstream version 1.0.4 --- iva/__init__.py | 1 + iva/assembly.py | 5 +- iva/common.py | 2 +- iva/external_progs.py | 8 + iva/gage/GetFastaStats$ContigAt.class | Bin 489 -> 0 bytes iva/gage/GetFastaStats.class | Bin 9674 -> 0 bytes iva/gage/SizeFasta.class | Bin 3241 -> 0 bytes iva/gage/SplitFastaByLetter.class | Bin 2497 -> 0 bytes iva/gage/Utils$Pair.class | Bin 621 -> 0 bytes iva/gage/Utils$ToProtein.class | Bin 4565 -> 0 bytes iva/gage/Utils$Translate.class | Bin 1226 -> 0 bytes iva/gage/Utils.class | Bin 5321 -> 0 bytes iva/gage/getScaffoldStats$1.class | Bin 199 -> 0 bytes iva/gage/getScaffoldStats$Scaffold.class | Bin 1232 -> 0 bytes iva/gage/getScaffoldStats.class | Bin 12187 -> 0 bytes iva/kcount.py | 16 +- iva/kraken.py | 21 +-- iva/seed.py | 7 +- iva/seed_processor.py | 7 +- iva/test_data_runner.py | 50 ++++++ iva/test_run_data/hiv_pcr_primers.fa | 36 +++++ iva/test_run_data/iva_contigs_no_trimmomatic.fasta | 167 +++++++++++++++++++++ .../iva_contigs_with_trimmomatic.fasta | 152 +++++++++++++++++++ iva/test_run_data/reads_1.fq.gz | Bin 0 -> 3813239 bytes iva/test_run_data/reads_2.fq.gz | Bin 0 -> 4661713 bytes iva/test_run_data/reference.fasta | 152 +++++++++++++++++++ iva/tests/kcount_test.py | 8 + scripts/iva | 19 +++ setup.py | 8 +- 29 files changed, 629 insertions(+), 30 deletions(-) diff --git a/iva/__init__.py b/iva/__init__.py index 825a828..47ee547 100644 --- a/iva/__init__.py +++ b/iva/__init__.py @@ -17,5 +17,6 @@ __all__ = [ 'read_trim', 'seed', 'seed_processor', + 'test_data_runner', ] from iva import * diff --git a/iva/assembly.py b/iva/assembly.py index 3a3a9ca..305c1d4 100644 --- a/iva/assembly.py +++ b/iva/assembly.py @@ -6,12 +6,13 @@ from iva import contig, mapping, seed, mummer, graph, edge, common import pyfastaq class Assembly: - def __init__(self, contigs_file=None, map_index_k=15, map_index_s=3, threads=1, max_insert=800, map_minid=0.5, min_clip=3, ext_min_cov=5, ext_min_ratio=2, ext_bases=100, verbose=0, seed_min_cov=5, seed_min_ratio=10, seed_min_kmer_count=200, seed_max_kmer_count=1000000000, seed_start_length=None, seed_stop_length=500, seed_overlap_length=None, make_new_seeds=False, contig_iter_trim=10, seed_ext_max_bases=50, max_contigs=50, clean=True, strand_bias=0): + def __init__(self, contigs_file=None, map_index_k=15, map_index_s=3, threads=1, kmc_threads=1, max_insert=800, map_minid=0.5, min_clip=3, ext_min_cov=5, ext_min_ratio=2, ext_bases=100, verbose=0, seed_min_cov=5, seed_min_ratio=10, seed_min_kmer_count=200, seed_max_kmer_count=1000000000, seed_start_length=None, seed_stop_length=500, seed_overlap_length=None, make_new_seeds=False, contig_iter_trim=10, seed_ext_max_bases=50, max_contigs=50, clean=True, strand_bias=0): self.contigs = {} self.contig_lengths = {} self.map_index_k = map_index_k self.map_index_s = map_index_s self.threads = threads + self.kmc_threads = kmc_threads self.max_insert = max_insert self.map_minid = map_minid self.min_clip = min_clip @@ -583,7 +584,7 @@ class Assembly: made_seed = False for i in range(max_attempts): - s = seed.Seed(reads1=seed_reads1, reads2=seed_reads2, extend_length=self.seed_ext_max_bases, seed_length=self.seed_start_length, seed_min_count=self.seed_min_kmer_count, seed_max_count=self.seed_max_kmer_count, ext_min_cov=self.seed_min_cov, ext_min_ratio=self.seed_min_ratio, verbose=self.verbose, threads=self.threads, sequences_to_ignore=self.used_seeds, contigs_to_check=self.contigs) + s = seed.Seed(reads1=seed_reads1, reads2=seed_reads2, extend_length=self.seed_ext_max_bases, seed_length=self.seed_start_length, seed_min_count=self.seed_min_kmer_count, seed_max_count=self.seed_max_kmer_count, ext_min_cov=self.seed_min_cov, ext_min_ratio=self.seed_min_ratio, verbose=self.verbose, kmc_threads=self.kmc_threads, map_threads=self.threads, sequences_to_ignore=self.used_seeds, contigs_to_check=self.contigs) if s.seq is None or len(s.seq) == 0: break diff --git a/iva/common.py b/iva/common.py index 5a8c804..271d35f 100644 --- a/iva/common.py +++ b/iva/common.py @@ -2,7 +2,7 @@ import argparse import os import sys import subprocess -version = '1.0.0' +version = '1.0.4' class abspathAction(argparse.Action): def __call__(self, parser, namespace, value, option_string): diff --git a/iva/external_progs.py b/iva/external_progs.py index a727111..8b42054 100644 --- a/iva/external_progs.py +++ b/iva/external_progs.py @@ -2,6 +2,7 @@ import shutil import subprocess import re import sys +from distutils.version import LooseVersion import pyfastaq from iva import common @@ -25,6 +26,11 @@ prog_to_version_cmd = { 'samtools': ('samtools', re.compile('^Version: (.*)$')), } + +minimum_versions = { + 'samtools': '0.1.19' +} + assembly_progs = [ 'kmc', @@ -79,6 +85,8 @@ def get_all_versions(progs, must_be_in_path=True): info = [] for prog in sorted(progs): version = get_version(prog, must_be_in_path=must_be_in_path) + if prog in minimum_versions and LooseVersion(version) < LooseVersion(minimum_versions[prog]): + raise Error('Found version ' + version + ' of ' + prog + ' but must be at least ' + minimum_versions[prog] + '. Cannot continue') info.append(' '.join(['Using', prog, 'version', version])) return info diff --git a/iva/gage/GetFastaStats$ContigAt.class b/iva/gage/GetFastaStats$ContigAt.class deleted file mode 100644 index 7b71d97..0000000 Binary files a/iva/gage/GetFastaStats$ContigAt.class and /dev/null differ diff --git a/iva/gage/GetFastaStats.class b/iva/gage/GetFastaStats.class deleted file mode 100644 index 11e678e..0000000 Binary files a/iva/gage/GetFastaStats.class and /dev/null differ diff --git a/iva/gage/SizeFasta.class b/iva/gage/SizeFasta.class deleted file mode 100644 index 4724b73..0000000 Binary files a/iva/gage/SizeFasta.class and /dev/null differ diff --git a/iva/gage/SplitFastaByLetter.class b/iva/gage/SplitFastaByLetter.class deleted file mode 100644 index 444915d..0000000 Binary files a/iva/gage/SplitFastaByLetter.class and /dev/null differ diff --git a/iva/gage/Utils$Pair.class b/iva/gage/Utils$Pair.class deleted file mode 100644 index c404bc5..0000000 Binary files a/iva/gage/Utils$Pair.class and /dev/null differ diff --git a/iva/gage/Utils$ToProtein.class b/iva/gage/Utils$ToProtein.class deleted file mode 100644 index c833220..0000000 Binary files a/iva/gage/Utils$ToProtein.class and /dev/null differ diff --git a/iva/gage/Utils$Translate.class b/iva/gage/Utils$Translate.class deleted file mode 100644 index 6842928..0000000 Binary files a/iva/gage/Utils$Translate.class and /dev/null differ diff --git a/iva/gage/Utils.class b/iva/gage/Utils.class deleted file mode 100644 index 66949ba..0000000 Binary files a/iva/gage/Utils.class and /dev/null differ diff --git a/iva/gage/getScaffoldStats$1.class b/iva/gage/getScaffoldStats$1.class deleted file mode 100644 index d6b14b7..0000000 Binary files a/iva/gage/getScaffoldStats$1.class and /dev/null differ diff --git a/iva/gage/getScaffoldStats$Scaffold.class b/iva/gage/getScaffoldStats$Scaffold.class deleted file mode 100644 index c1c69fb..0000000 Binary files a/iva/gage/getScaffoldStats$Scaffold.class and /dev/null differ diff --git a/iva/gage/getScaffoldStats.class b/iva/gage/getScaffoldStats.class deleted file mode 100644 index de57f0f..0000000 Binary files a/iva/gage/getScaffoldStats.class and /dev/null differ diff --git a/iva/kcount.py b/iva/kcount.py index 1b8a957..1146c58 100644 --- a/iva/kcount.py +++ b/iva/kcount.py @@ -52,14 +52,14 @@ def _median(d): return key -def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail): +def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail, threads=1): f = pyfastaq.utils.open_file_write(script) print('set -e', file=f) kmc_command = ''.join([ 'kmc -fa', ' -m', str(m_option), ' -k', str(kmer), - ' -sf', '1', + ' -sf', str(threads), ' -ci', str(min_count), ' -cs', str(max_count), ' -cx', str(max_count), @@ -80,7 +80,7 @@ def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_o return common.syscall('bash ' + script, allow_fail=allow_fail) -def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0): +def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0, threads=1): '''Runs the kmer counting program kmc on a FASTA file. Returns filename made by kmc of the counts of kmers''' reads = os.path.abspath(reads) tmpdir = tempfile.mkdtemp(prefix='tmp.run_kmc.', dir=os.getcwd()) @@ -92,11 +92,11 @@ def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0): # The range is 4-32 (GB). # Try 4 and 32 (the default), then give up. This seems to make a difference, regardless of # RAM available on the machine. - ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 32, verbose, True) + ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 32, verbose, True, threads=threads) if not ran_ok: if verbose: print('First try of running kmc failed. Trying again with -m4 instead of -m32...', flush=True) - ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 4, verbose, False) + ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 4, verbose, False, threads=threads) os.chdir(original_dir) shutil.rmtree(tmpdir) @@ -204,7 +204,7 @@ def _counts_file_to_fasta(infile, outfile): pyfastaq.utils.close(fout) -def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_count=10, max_count=100000000, most_common=100, method='kmc', verbose=0, ignore_seqs=None, contigs_to_check=None, threads=1): +def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_count=10, max_count=100000000, most_common=100, method='kmc', verbose=0, ignore_seqs=None, contigs_to_check=None, kmc_threads=1, map_threads=1): '''Gets the most common kmers from a pair of interleaved read FASTA or FASTQ files. Takes the first N sequences (determined by head). Returns a dict of kmer=>frequency. If kmer length is not given, use min(0.8 * median read length, 95)''' tmpdir = tempfile.mkdtemp(prefix='tmp.common_kmers.', dir=os.getcwd()) counts = {} @@ -217,8 +217,8 @@ def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_cou kmer_length = min(int(0.8 * _median(read_lengths)), 95) if method == 'kmc': - counts_file = _run_kmc(reads, os.path.join(tmpdir, 'out'), kmer_length, min_count, max_count, verbose=verbose) - counts = _kmc_to_kmer_counts(counts_file, most_common, kmers_to_ignore=ignore_seqs, contigs_to_check=contigs_to_check, verbose=verbose, threads=threads) + counts_file = _run_kmc(reads, os.path.join(tmpdir, 'out'), kmer_length, min_count, max_count, verbose=verbose, threads=kmc_threads) + counts = _kmc_to_kmer_counts(counts_file, most_common, kmers_to_ignore=ignore_seqs, contigs_to_check=contigs_to_check, verbose=verbose, threads=map_threads) else: raise Error('Method "' + method + '" not supported in kcount.get_most_common_kmers(). Cannot continue.') diff --git a/iva/kraken.py b/iva/kraken.py index 5b95b8b..fc5bbda 100644 --- a/iva/kraken.py +++ b/iva/kraken.py @@ -1,3 +1,4 @@ +import stat import inspect import sys import os @@ -25,7 +26,7 @@ class Database: self.minimizer_len = minimizer_len self.max_db_size = max_db_size self.current_taxon_id = 2000000000 - self.current_gi = 4000000000 + self.current_gi = 4000000000 self.preload = preload self.verbose = verbose self.taxon_to_parent = {} @@ -88,11 +89,11 @@ class Database: 'genbank_ids': genbank_ids, 'new_gis': new_gis, } - + self.current_taxon_id += 1 pyfastaq.utils.close(f) - + def _download_from_genbank(self, outfile, filetype, gi, max_tries=5, delay=3): assert filetype in ['gb', 'fasta'] file_ok = False @@ -137,7 +138,7 @@ class Database: gi = None for line in f: if line.startswith(' /db_xref="taxon:'): - taxon_id = line.rstrip().split(':')[-1].rstrip('"') + taxon_id = line.rstrip().split(':')[-1].rstrip('"') elif line.startswith('VERSION'): gi = line.rstrip().split()[-1].split(':')[-1] if None not in [taxon_id, gi]: @@ -161,7 +162,7 @@ class Database: iva.common.syscall('grep -v CONTIG ' + infile + ' > tmp.gbk; mv tmp.gbk ' + infile) iva.common.syscall(genbank2embl + ' ' + infile + ' ' + outfile, verbose=self.verbose) shutil.rmtree(tmpdir) - + def _append_to_file(self, filename, line): try: @@ -199,7 +200,7 @@ class Database: ]) + '\t|' self._append_to_file(self.kraken_nodes_dmp, line) self.added_to_kraken.add(new_taxon) - self._append_to_file(self.kraken_gi_taxid_nucl_dmp, str(new_gi) + '\t' + str(new_taxon)) + self._append_to_file(self.kraken_gi_taxid_nucl_dmp, str(new_gi) + '\t' + str(new_taxon)) iva.common.syscall('kraken-build --add-to-library ' + fa_file + ' --db ' + self.kraken_db, verbose=self.verbose) @@ -254,7 +255,7 @@ class Database: print('unlink', os.path.exists(fa_file), fa_file) os.unlink(gb_file) os.unlink(fa_file) - + def _build_kraken_virus_db(self): if os.path.exists(self.done_files['clean']): @@ -270,7 +271,7 @@ class Database: iva.common.syscall('kraken-build --download-taxonomy --db ' + self.kraken_db, verbose=self.verbose) if not self.skip_virus_download: iva.common.syscall('kraken-build --download-library viruses --db ' + self.kraken_db, verbose=self.verbose) - + if self.extra_refs_file is not None: self._load_extra_ref_info() self._download_extra_refs() @@ -299,7 +300,7 @@ class Database: if os.path.exists(self.extra_refs_dir): shutil.rmtree(self.extra_refs_dir) iva.common.syscall('touch ' + self.done_files['clean'], verbose=self.verbose) - + def _get_genbank_virus_files(self): if os.path.exists(self.done_files['make_embl']): @@ -336,7 +337,7 @@ class Database: print() new_dir = re.sub('_uid[0-9]+$', '', directory).strip('_') if new_dir != directory: - os.rename(directory, new_dir) + os.rename(directory, new_dir) final_dir = os.path.join(self.embl_root, os.path.basename(new_dir)) if os.path.exists(final_dir): diff --git a/iva/seed.py b/iva/seed.py index 883b689..993e31f 100644 --- a/iva/seed.py +++ b/iva/seed.py @@ -8,13 +8,14 @@ from iva import kcount, kmers, mapping class Error (Exception): pass class Seed: - def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, threads=1, sequences_to_ignore=None, contigs_to_check=None): + def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1, map_threads=1, sequences_to_ignore=None, contigs_to_check=None): if contigs_to_check is None: contigs_to_check = {} if sequences_to_ignore is None: sequences_to_ignore = set() self.verbose = verbose - self.threads = threads + self.kmc_threads = kmc_threads + self.map_threads = map_threads self.extend_length = extend_length self.ext_min_cov = ext_min_cov self.ext_min_ratio = ext_min_ratio @@ -23,7 +24,7 @@ class Seed: if seq is None: if reads1 is None: raise Error('Cannot construct Seed object. Need reads when no seq has been given') - kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check) + kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check, kmc_threads=self.kmc_threads, map_threads=self.map_threads) if len(kmer_counts) == 1: self.seq = list(kmer_counts.keys())[0] if self.verbose: diff --git a/iva/seed_processor.py b/iva/seed_processor.py index 316a785..d62d6b3 100644 --- a/iva/seed_processor.py +++ b/iva/seed_processor.py @@ -9,7 +9,7 @@ import pyfastaq class Error (Exception): pass class SeedProcessor: - def __init__(self, seeds_fasta, reads1, reads2, outfile, index_k=15, index_s=3, threads=1, max_insert=500, minid=0.9, seed_stop_length=500, extend_length=50, overlap_length=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000): + def __init__(self, seeds_fasta, reads1, reads2, outfile, index_k=15, index_s=3, threads=1, max_insert=500, minid=0.9, seed_stop_length=500, extend_length=50, overlap_length=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1): self.seeds_fasta = seeds_fasta self.reads1 = reads1 self.reads2 = reads2 @@ -17,6 +17,7 @@ class SeedProcessor: self.index_k = index_k self.index_s = index_s self.threads = threads + self.kmc_threads = kmc_threads self.max_insert = max_insert self.minid = minid self.seed_stop_length = seed_stop_length @@ -61,7 +62,9 @@ class SeedProcessor: verbose = self.verbose, seed_length = self.seed_length, seed_min_count = self.seed_min_count, - seed_max_count = self.seed_max_count + seed_max_count = self.seed_max_count, + kmc_threads = self.kmc_threads, + map_threads = self.threads ) if len(new_seed) == 0: print('Warning: could not get most common kmer for', seed_name) diff --git a/iva/test_data_runner.py b/iva/test_data_runner.py new file mode 100644 index 0000000..6585899 --- /dev/null +++ b/iva/test_data_runner.py @@ -0,0 +1,50 @@ +import os +import iva + +class Error (Exception): pass + +class Tester: + def __init__(self, outdir, iva_script, trimmo_jar=None, threads=1): + self.outdir = os.path.join(outdir) + if os.path.exists(self.outdir): + raise Error('Output directory alread exists. Cannot continue') + + self.iva_script = iva_script + self.trimmo_jar = trimmo_jar + self.threads = threads + + + def _copy_input_files(self): + extractor = iva.egg_extract.Extractor(os.path.abspath(os.path.join(os.path.dirname(iva.__file__), os.pardir))) + test_files = os.path.join('iva', 'test_run_data') + extractor.copy_dir(test_files, self.outdir) + print('Copied input test files into here:', os.path.abspath(self.outdir)) + + + def _run_iva(self): + os.chdir(self.outdir) + cmd = self.iva_script + ' --threads ' + str(self.threads) + if self.trimmo_jar: + cmd += ' --trimmomatic ' + self.trimmo_jar + + cmd += ' --pcr_primers hiv_pcr_primers.fa -f reads_1.fq.gz -r reads_2.fq.gz iva.out' + + print('Current working directory:', os.getcwd()) + print('Running iva on the test data with the command:', cmd, sep='\n') + iva.common.syscall(cmd) + + + def _check_output(self): + print('Finished running iva') + expected_contigs_file = os.path.abspath(os.path.join('iva.out', 'contigs.fasta')) + if os.path.exists(expected_contigs_file): + print('Looks OK. Final output contigs file is:', expected_contigs_file) + else: + print('Something went wrong! Final output contigs file not found:', expected_contigs_file) + + + def run(self): + self._copy_input_files() + self._run_iva() + self._check_output() + diff --git a/iva/test_run_data/hiv_pcr_primers.fa b/iva/test_run_data/hiv_pcr_primers.fa new file mode 100644 index 0000000..90d2288 --- /dev/null +++ b/iva/test_run_data/hiv_pcr_primers.fa @@ -0,0 +1,36 @@ +>Pan-HIV-1_1F.1 +AGCCCGGGAGCTCTCTG +>Pan-HIV-1_1F.2 +AGCCTGGGAGCTCTCTG +>Pan-HIV-1_1R.1 +CCTCCAATTCCCCCTATCATTTT +>Pan-HIV-1_1R.2 +CCTCCAATTCCTCCTATCATTTT +>Pan-HIV-1_2F.1 +GGGAAGTGACATAGCAGGAAC +>Pan-HIV-1_2F.2 +GGGAAGTGACATAGCTGGAAC +>Pan-HIV-1_2F.3 +GGGAAGTGATATAGCAGGAAC +>Pan-HIV-1_2F.4 +GGGAAGTGATATAGCTGGAAC +>Pan-HIV-1_2R.1 +CTGCCATCTGTTTTCCATAATC +>Pan-HIV-1_2R.2 +CTGCCATCTGTTTTCCATAGTC +>Pan-HIV-1_3F +TTAAAAGAAAAGGGGGGATTGGG +>Pan-HIV-1_3R.1 +TGGCCTGTACCGTCAGCG +>Pan-HIV-1_3R.2 +TGGCTTGTACCGTCAGCG +>Pan-HIV-1_4F +CCTATGGCAGGAAGAAGCG +>Pan-HIV-1_4R.1 +CTTATATGCAGCATCTGAGGG +>Pan-HIV-1_4R.2 +CTTATATGCAGCTTCTGAGGG +>Pan-HIV-1_4R.3 +CTTTTATGCAGCATCTGAGGG +>Pan-HIV-1_4R.4 +CTTTTATGCAGCTTCTGAGGG diff --git a/iva/test_run_data/iva_contigs_no_trimmomatic.fasta b/iva/test_run_data/iva_contigs_no_trimmomatic.fasta new file mode 100644 index 0000000..9ab1fa0 --- /dev/null +++ b/iva/test_run_data/iva_contigs_no_trimmomatic.fasta @@ -0,0 +1,167 @@ +>contig.00001 +GCTAGCAAGGGAACCCACTGCTTAAAGCCTCAATAAAGCTTGCCTTGAGTGCTTAAAGTG +GTGTGTGCCCGTCTGTGTTAGGACTCTGGTAACTAGAGATCCCTCAGACCACTCTAGACT +GAGTAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGTTAATAGGGA +CTCGAAAGCGAAAGTTCCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAGGTGCA +CACAGCAAGAGGCGAGAGCGGCGACTGGTGAGTACGCCAAATTTTGACTAGCAGAGGCTA +GAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGGAGGAAAATTAGATGCATGGG +AAAAAATTCGGCTACGGCCAGGGGGAAAGAAAAAGTATAGGCTGAAACATTTAGTATGGG +CAAGCAGAGAGTTGGAAAGATTCGCAATTAACCCTGGCCTTTTAGAATCAGCAGAAGGAT +GTCAACAAATAATAGAACAGTTACAGCCAACTCTCAAGACAGGATCAGAAGAACTTAAAT +CTTTATATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGGGGTAAAAGACA +CCAAGGAAGCTCTAGATAAAATAGAGGAAATACAAAATAAGAGCCAGCAAAAGACACAGC +AGGCAGCCGCTAGCACAGGAAGCAGCGGCAAAGTCAGTCAAAATTACCCTATAGTGCAAA +ATGCACAAGGGCAAATGACACATCAGTCTTTATCACCCAGAACTTTGAATGCATGGGTGA +AAGTAGTAGAAGAAAAGGGTTTTAACCCAGAAGTAATACCCATGTTCTCAGCATTATCAG +AGGGAGCCACCCCACAAGATTTAAATATGATGCTAAATATAGTGGGGGGACACCAGGCAG +CAATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGGATACACC +CAGTACATGCAGGGCCTATTCCACCAGGCCAAATGAGGGAACCAAGGGGAAGTGACATAG +CAGGAACTACTAGTACCCTTCAAGAACAAATAGGATGGATGACAAGCAATCCACCTATCC +CAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGATTGAATAAAATAGTAAGAATGT +ATAGCCCTGTTAGCATTTTGGATATAAAACAAGGGCCAAAAGAGCCCTTCAGAGACTATG +TGGATAGGTTCTATAGAACTCTCAGAGCGGAGCAAGCTACACAGGAGGTAAAAAATTGGA +TGACAGAAACCTTACTAGTCCAAAATGCGAATCCAGACTGTAAGTCCATTTTAAAAGCAT +TAGGAACAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCTA +GCCACAAAGCAAGGGTTTTGGCTGAGGCAATGAGCCAAGCACAACATACAAATATAATGA +TGCAGAGAGGCAATTTTAAGGGCCAGAAAAGAATTAAGTGTTTCAACTGTGGCAAAGAAG +GACACCTAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGTTGTTGGAAATGTGGGAAGG +AAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAATTTGGC +CTTCCAACAGAGGAAGGCCAGGGAATTTTCCTCAGAGCAGAACAGAGCCAACAGCCCCAC +CAGCAGAGAATTGGGGGATGGGGGAAGAGACAACCTCCTTACTGAAGCAGGAGCAGAAGG +ACAAGGAACATCCTCCTCCCTCTCCTCCCTCAATTTCCCTCAAATCACTCTTTGGCAGCG +ACCCCTTGTCACAGTAAAAATAGGGGGACAGCTGAAAGAAGCTCTATTAGATACAGGAGC +AGATGATACAGTATTAGAAGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGG +GGGAATTGGAGGTTTTATCAAGGTAAAGCAGTATGATCAGATATGTATAGAAATTTGTGG +AAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACACCTGTCAACATAATTGGACGAAA +TATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAAACTGTACC +AGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAGTGGCCATTGACAGAAGA +AAAAATAAAAGCATTAACAGAAATTTGTAATGATATGGAAAAGGAAGGAAAAATCTCAAA +AATTGGGCCTGAAAACCCATATAATACTCCAATATTTGCTATAAAGAAAAAGGACAGCAC +CAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTTTGGGA +AGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACT +AGATGTGGGAGATGCATATTTTTCAGTACCTTTAGATGAAAACTTTAGAAAGTATACTGC +ATTCACCATACCTAGTAGAAACAATGAGACACCAGGAATCAGATATCAGTACAATGTGCT +GCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGA +TCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTGTATGT +AGGATCTGACTTAGAGATAGAGCAGCATAGAACAAAAATAGAGGAACTAAGACACCATCT +GTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTG +GATGGGGTATGAACTCCATCCGGATAAGTGGACAGTACAGCCTATAAAGCTGCCAGAAAA +AGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGCCA +GATTTATCCAGGGATTAAAGTAAAACAATTATGTAAACTCATTAGGGGGACCAAAACACT +AACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGAT +TCTAAAAGAACCAGTACATGGAACATATTATGACCCATCAAAAGACTTAATAGCAGAAAT +ACAGAAGCAGGGAAATGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCT +GAAAACAGGAAAGTATGCAAAAATAAGGGGAGCCCACACCAATGATGTAAAACTATTAAC +AGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAGATT +TAGATTACCCATACAAAGAGAAACATGGGACACATGGTGGACGGAATATTGGCAGGCTAC +CTGGATTCCTGAATGGGAATTTGTTAATACCCCTCCTCTAGTAAAATTATGGTACCAATT +AGAAAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAGTAGGGA +GACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGACAAAAGGTAGTTTCCCT +AACTGAGACAACAAATCAGAAGACTGAATTGCATGCGATCCATTTAGCCTTGCAGGATTC +AGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACA +ACCAGACATGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAAAAGGA +AAGAGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGA +TAAACTAGTCAGTTCAGGAATCAGGAAGGTGCTATTTTTAGATGGGATAGACAAAGCTCA +AGAAGATCATGAAAGATATCACAGCAATTGGAGAACAATGGCTAGTGATTTTAATTTGCC +ACCTATAGTAGCAAAGGAAATAGTAGCCAACTGTGATAAATGTCAACTGAAAGGGGAAGC +TATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGCACACATCTAGA +AGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTTAT +CCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCC +AGTAAAAGTAATACACACAGACAATGGTAGCAATTTCACCAGCACTGCAGTTAAAGCAGC +CTGTTGGTGGGCCAATGTCCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGG +AGTAGTAGAATCTATGAATAGGGAATTAAAGAAAATCATAGGGCAGGTAAGAGAGCAAGC +TGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGG +GGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACA +AACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGA +CAGCAGAGACCCAATTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCAGT +AGTAATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAG +GGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAAGATTA +AAACATGGAACAGTCTAGTAAAATATCATATGTATAGATCAAAGAAAGCTAAAGAGTGGT +TTTATAGACATCATTATGAAAGCCAGAATCCAAAGGTAAGTTCAGAAGTACATATCCCAC +TAGGAGAGGCTAGATTAATAATAAGAACATATTGGGGTCTGCAGACAGGAGAAAGGGACT +GGCATTTGGGTCATGGGGTCTCCATAGAATGGAGTCAGAGAAATTATAGCACACAAATAG +ATCCTGACCTAGCAGACCAACTGATTCATCTACAATATTTTGACTGTTTTTCAGACTCTG +CCATAAGGAAAGCCATATTAGGACAAGTAGTTAGTCATAGGTGTGAATATCCATCAGGAC +ATAACAAGGTAGGATCCCTACAATATTTGGCACTGAAAGCATTAGCAACACCAAAAAAGA +TAAGGCCACCTCTGCCTAGTGTTAAGAAATTAACAGAAGATAGATGGAACGAGCCCCAGA +AGATCAGGGGCCACAGAGAGAACCCAACAATGAATGGACATTAGAACTATTAGAGGAGCT +TAAAAATGAAGCTGTCAGACATTTCCCTAGGCTCTGGCTCCATGGCTTAGGACAGCACAT +CTATGACACTTATGGGGATACTTGGGAAGGGGTTGAAGCTATAATAAGAACTTTGCAACA +ACTACTGTTTGTTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATACCAGG +GAGAAGAGGCAGGAATGGAGCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCGGGGA +GTCAGCCTACAACTGCTTGTACCAATTGCTACTGTAAAAAATGTTGCTGGCATTGCCAAC +TATGCTTTCTGAAAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAAACACCGAC +GAGGAACTCCTCGCAGCAGTAAGGACCATCAAAATCCTATACCAGAGCAGTAAGTACTAA +ATATATGTAATGCAAGCTTTAGCTATATTAGCAATAGTAGGATTAGTATTAGCAGGAATA +ATAGCAATAGTTGTGTGGACTATAGTGTTCATAGAATATAGGAAAATAAGAAAACAAAAG +AAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGAAATGAGAGT +GACGGGGACACAGATGACTTGGCCAAGCTTTTGGAAATGGGGGACCTTGATCCTTGGGTT +GGTGATAATTTGTAGTGCTGCAGAACAATTGTGGGTTACAGTTTATTATGGGGTTCCTGT +GTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAGACAGA +AATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAAT +ACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAGCAGAT +GCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGTTAACTCC +TCTCTGTGTTACTTTAAATTGTACGAAGGTCAATATGGCCGGAGTCAATATTACTGACAC +TGACAATATCACCAACATAATAGGAAATGGAACAGAGGAAGTAAGAAACTGTTCTTTTAA +TATGACCACAGAACTAAATGATAAGCAGCGGAAGATTCATGCACTTTTTTATAAGCTTGA +TATAGTATCAATGTATAATGATAATAGTAGTTATAGGTTAATAAATTGTAATACTTCAGT +CATTAAGCAGGCTTGTCCAAAGGTATCCTTTGATCCAATTCCTATACATTATTGTACTCC +AGCTGGTTATGCGATTTTAAAGTGTAATGATAAGAATTTCAATGGGACAGGGCCATGTAA +AAATGTCAGCTCAGTACAATGCACACATGGAATTAAGCCAGTGGTATCAACTCAATTGCT +GTTAAATGGAAGTCTAGCAGAAGAAGAGATAATAATCAGATCTGAAAATCTCACAGATAA +TACAAAAACCATAATAGTGCACCTTAATACATCTGTACAAATTAATTGTACCAGACCCTC +TAACAATACAAGAACAAGTGCAAGTATAGGACCAGGACAAGTATTATTCTATAGACCAGG +AGAAGTAATAGGAAATATAAGAAAAGCATATTGTAATATTAGTGGAACAGCATGGAGGAA +AGTCTTAGAACAGGTAACTGGAAAACTAAAAGAACACTTTAATAAGACAATAATCGTTGA +ACCACACTCAGGAGGAGATCTAGAAATTACAACACATCACTTTAATTGTAGAGGGGAATT +TTTTTATTGCAATACAACAAAACTGTTTACTAATAATTGCACAGATAACAGCACAGGGGG +GTGTAATGATACTAATATCATAATTCCATGCAAGATAAGACAAATTGTACGCATGTGGCA +AGGAGTAGGACAAGCAATGTACGCTCCTCCCATCAGTGGAGAAATTAAGTGTGTATCAAA +TATTACAGGAATGCTATTGACAAGAGATGGTGGTAATACAACTAATGAGACCTTCAGACC +TGGAGGAGGAAATATAAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACA +AATTGATCCACTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGGACAGAGAAAA +AAGAGCAGTGGGAATAGGAGCTATGATCTTTGGGTTCTTAGGAGCAGCAGGAAGCACTAT +GGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGAGAATTATTGTCTGGTATAGTGCA +ACAGCAAAGCAATTTGCTGAGGGCTATAGAGGCGCAGCAGCATCTGTTGCAACTCACAGT +CTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCA +AAAGTTCCTAGGACTTTGGGGCTGCTCTGGAAAAATCATCTGTCCCACTGCTGTGCCCTG +GAACACCTCTTGGAGTAATAAATCTTATGAAGAAATTTGGAACAACATGACATGGATAGA +ATGGGAGAGAGAAATTAGCAATTACACAAGCCAAATATATGAGATACTTACAAAATCGCA +GGACCAGCAGGATAGAAATGAAAAGGATTTGTTAGAATTGGACAATTGGGCAAGTCTGTG +GACTTGGTTTGACATATCAAATTGGCTGTGGTATATAAGAATATTTATAATGATAGTAGG +AGGTTTAATAGGTTTAAGAATAATTTTTGCTGTGCTTTCCATAGCGAATAGAGTTAGGCA +GGGATACTCACCTCTGTCTTTCCAGACCCCTATCCAACTGCAGAGGGAACCCGACAGGCC +CGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGAGATTAGTCAG +CGGATTCTTGACTCTTGTCTGGGACGATCTACGGAGCCTGTTCCTCTTCCTCTACCACCG +CTTGAGAGACTTCATCTTAATTGCAGCGAGGACTGTGGAACTTCTGGGACACAACAGTCT +CAAGGGACTGAGACGGGGGTGGGAAGGCCTCAAATACCTGGGGAATCTTCTGTTGTATTG +GGGCCAGGAACTAAAAACTAGTGCTATTTCTTTGTTTAATGCTACAGCAATAGCAGTAGG +GGGGTGGACAGATAGACTTATAGAAGTAGCGCAAAGAGCTTGGAGAGCCCTTCTCCACAT +ACCTAGAAGAATCAGACAGGGCTTAGAAAGGGCTTTGCTATAACATGGGAGGCAAGTGGT +CAAAAAGTAGCATAGTGGGGTGGCCTCAGGTCAGGGAAAGATTAAGGAGAACAAACCCTC +AAGCAACAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGCAGTAACAA +GTACTAATATGAATAATGCAGATAGTGTCTGGCTGAGAGCACAAGAAGAAGATAACGAGG +GGGTAGGCTTTCCAGTCAGGCCACAGGTACCTCTAAGACCAATGACTTTTAAGGGAGCAT +TTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAGA +AAAGACAAGAGATCATTGACTTATGGGTTTATAATACACAAGGCTACTTCCCTGATTGGC +AAAACTACACACCAGGGCCAGGGGTCAGATACCCACTGTGTTTTGGATGGTGCTTCAAGT +TAGTACCAGTTGACCCAAGTGAAGTAGAGGAGAACAACAAAGGAGAAAACAACTGCCTGC +TACATCCCATGAGCCAGCATGGGCAAGAGGACGAGGAAAGAGAAGTGCTGATGTGGAAGT +TTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACAACATCCAGAGTACTATAAAG +ACTGCTGACAAAGAAGTTTCTAACTAGGACTTCCGCTGGGGACTTTCCAGGGGAGGTGTG +GCCGGGGCGGAGCTGGGGAGTGGTTAACCCTCAGAAGCTGCATAAAAGAGATCGGAAGAG +CGGTTCAGCAGGAATGCCGAGACCGATCTCGGTCGTGTATCTCGTATGCCGTCTTCTGCT +TGA +>contig.00002 +GTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAGACAGA +AATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAAT +ACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAGCAGAT +GCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGTTAACTCC +TCTCTGTGTTACTTTACATTGTACCAATGTCACTAACATAGGAGGAGATGGAACAAAGGA +AGTAAGAAACTGTTCTTTTAATATGACCACAGAACTAAAAGATAAGAAGCGGGAGATTCA +TGCACTTTTTTATAGGCTTGATATAGTACCAGTTGATCCTAAGGCTAATAATAGTGAGTA +TAGGTTAATAAATTGTAATACTTCAGTCATTAAGCAGGCTTGTCCAAAGGTATCCTTTGA +TCCAATTCCTATACATTATTGCACTCCAGCTGGTTATGCGATTTTAAAGTGTAATGATAA +GAATTTCAATGGGACAGGGCCATGTAAAAATGTCAGCTCAGTACAATGCACACATGGAAT +TAAGCCAGTGGTATCAACTCAATTGCTGTTAAATGGAAGTCTAGCAGAAGAAGAGATAAT +AATCA diff --git a/iva/test_run_data/iva_contigs_with_trimmomatic.fasta b/iva/test_run_data/iva_contigs_with_trimmomatic.fasta new file mode 100644 index 0000000..f3c3e05 --- /dev/null +++ b/iva/test_run_data/iva_contigs_with_trimmomatic.fasta @@ -0,0 +1,152 @@ +>contig.00001 +CTCTGGCTAGCAAGGGAACCCACTGCTTAAAGCCTCAATAAAGCTTGCCTTGAGTGCTTA +AAGTGGTGTGTGCCCGTCTGTGTTAGGACTCTGGTAACTAGAGATCCCTCAGACCACTCT +AGACTGAGTAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGTTAAT +AGGGACTCGAAAGCGAAAGTTCCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAG +GTGCACACAGCAAGAGGCGAGAGCGGCGACTGGTGAGTACGCCAAATTTTGACTAGCAGA +GGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGGAGGAAAATTAGATGC +ATGGGAAAAAATTCGGCTACGGCCAGGGGGAAAGAAAAAGTATAGGCTGAAACATTTAGT +ATGGGCAAGCAGAGAGTTGGAAAGATTCGCAATTAACCCTGGCCTTTTAGAATCAGCAGA +AGGATGTCAACAAATAATAGAACAGTTACAGCCAACTCTCAAGACAGGATCAGAAGAACT +TAAATCTTTATATAATACAGTAGCAACCCTCTGGTGCGTACACCAAAGGACAGATGTAAA +AGACACCAAGGAAGCTTTAGATAAAATAGAGGAAGCACAAAACAGGAACCAGCAAAAGAC +ACAGCAGGCAGCCGCTAGCACAGGAAGCAGCAGCAACGTCAGCCAAAATTACCCTATAGT +GCAAAATGCACAAGGGCAAATGACACATCAGTCTTTATCACCCAGAACTTTGAATGCATG +GGTGAAAGTAGTAGAAGAAAAGGGTTTTAACCCAGAAGTAATACCCATGTTCTCAGCATT +ATCAGAGGGAGCCACCCCACAAGATTTAAATATGATGCTAAATATAGTGGGGGGACACCA +GGCAGCAATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGGAT +ACACCCAGTACATGCAGGGCCTATTCCACCAGGCCAAATGAGGGAACCAAGGGGAAGTGA +CATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGGATGGATGACAAGCAATCCACC +TATCCCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGATTGAATAAAATAGTAAG +AATGTATAGCCCTGTTAGCATTTTGGATATAAAACAAGGGCCAAAAGAGCCCTTCAGAGA +CTATGTGGATAGGTTCTATAGAACTCTCAGAGCGGAGCAAGCTACACAGGAGGTAAAAAA +TTGGATGACAGAAACCTTACTAGTCCAAAATGCGAATCCAGACTGTAAGTCCATTTTAAA +AGCATTAGGAACAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGG +ACCTAGCCACAAAGCAAGGGTTTTGGCTGAGGCAATGAGCCAAGCACAACATACAAATAT +AATGATGCAGAGAGGCAATTTTAAGGGCCAGAAAAGAATTAAGTGTTTCAACTGTGGCAA +GGAAGGACACCTAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGTTGTTGGAAATGTGG +GAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAAT +TTGGCCTTCCAACAGAGGAAGGCCAGGGAATTTTCCTCAGAGCAGAACAGAGCCAACAGC +CCCACCAGCAGAGAATTGGGGGATGGGGGAAGAGACAACCTCCTTACTGAAGCAGGAGCA +GAAGGACAAGGAACATCCTCCTCCCTCTCCTCCCTCAATTTCCCTCAAATCACTCTTTGG +CAGCGACCCCTTGTCACAGTAAAAATAGGGGGACAGCTGAAAGAAGCTCTATTAGATACA +GGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATG +ATAGGGGGAATTGGAGGTTTTATCAAGGTAAAGCAGTATGATCAGATATGTATAGAAATT +TGTGGAAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACACCTGTCAACATAATTGGA +CGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAAACT +GTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAGGTTAAACAGTGGCCATTAACA +GAAGAAAAAATAAAAGCATTAACAGAAATTTGTAATGATATGGAAAAGGAAGGAAAAATC +TCAAAAATTGGGCCTGAAAACCCATATAATACTCCAATATTTGCTATAAAGAAAAAGGAC +AGCACCAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAGGACTTT +TGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACA +GTACTAGATGTGGGAGATGCATATTTTTCAGTACCTTTAGATGAAAACTTTAGAAAGTAT +ACTGCATTCACCATACCTAGTAGAAACAATGAGACACCAGGAATCAGATATCAGTACAAT +GTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATC +TTAGATCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTG +TATGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAATAGAGGAACTAAGACAC +CATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTC +CTTTGGATGGGGTATGAACTCCATCCGGATAAGTGGACAGTACAGCCTATAAAGCTTCCA +GAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCA +AGCCAGATTTATCCAGGGATTAAAGTAAAACAATTATGTAAACTCATTAGGGGGACCAAA +ACACTAACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCAGAAAACAGA +GAGATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCA +GAAATACAGAAGCAGGGAAATGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAA +AATCTGAAAACAGGAAAGTATGCAAAAATAAGGGGAGCCCACACCAATGATGTAAAACTA +TTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCT +AGATTTAGATTACCCATACAAAGAGAAACATGGGACACATGGTGGACGGAATATTGGCAG +GCTACCTGGATTCCTGAATGGGAATTTGTTAATACCCCTCCTCTAGTAAAATTATGGTAC +CAATTAGAAAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAGT +AGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGACAAAAGGTAGTT +TCCCTAACTGAGACAACAAATCAGAAGACTGAATTGCATGCGATCCATTTAGCCTTGCAG +GATTCAGGATCAGAAGTGAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAA +GCACAACCAGACATGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAA +AAGGAAAGAGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAA +GTAGATAAACTAGTCAGTTCAGGAATCAGGAAGGTGCTATTTTTAGATGGGATAGACAAA +GCTCAAGAAGATCATGAAAGATATCACAGCAATTGGAGAACAATGGCTAGTGATTTTAAT +TTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAACTGTGATAAATGTCAACTGAAAGGG +GAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGCACACAT +CTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAA +GTTATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGA +TGGCCAGTAAAAGTAATACACACAGACAATGGTAGCAATTTCACCAGCACTGCAGTTAAA +GCAGCCTGTTGGTGGGCCAATGTCCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGT +CAAGGAGTAGTAGAATCTATGAATAGGGAATTAAAGAAAATCATAGGGCAGGTAAGAGAG +CAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGA +AAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGAC +ATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTAC +AGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGG +GCAGTAGTAATACAAGACAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAGATT +ATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAA +GATTAAAACATGGAACAGTCTAGTAAAATATCATATGTATAGATCAAAGAAAGCTAAAGA +GTGGTTTTATAGACATCATTATGAAAGCCAGAATCCAAAGGTAAGTTCAGAAGTACATAT +CCCACTAGGAGAGGCTAGATTAATAATAAGAACATATTGGGGTCTGCAGACAGGAGAAAG +GGACTGGCATTTGGGTCATGGGGTCTCCATAGAATGGAGTCAGAGAAATTATAGCACACA +AATAGATCCTGACCTAGCAGACCAACTGATTCATCTACAATATTTTGACTGTTTTTCAGA +CTCTGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGTCATAGGTGTGAATATCCATC +AGGACATAACAAGGTAGGATCCCTACAATATTTGGCACTGAAAGCATTAGCAACACCAAA +AAAGATAAGGCCACCTCTGCCTAGTGTTAAGAAATTAACAGAAGATAGATGGAACGAGCC +CCAGAAGATCAGGGGCCACAGAGAGAACCCAACAATGAATGGACATTAGAACTATTAGAG +GAGCTTAAAAATGAAGCTGTCAGACATTTCCCTAGGCTCTGGCTCCATGGCTTAGGACAG +CACATCTATGACACTTATGGGGATACTTGGGAAGGGGTTGAAGCTATAATAAGAACTTTG +CAACAACTACTGTTTGTTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATA +CCAGGGAGAAGAGGCAGGAATGGAGCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCC +GGGGAGTCAGCCTACAACTGCTTGTACCAATTGCTACTGTAAAAAATGTTGCTGGCATTG +CCAACTATGCTTTCTGAAAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAAACA +CCGACGAGGAACTCCTCGCAGCAGTAAGGACCATCAAAATCCTATACCAGAGCAGTAAGT +ACTAAATATATGTAATGCAAGCTTTAGCTATATTAGCAATAGTAGGATTAGTATTAGCAG +GAATAATAGCAATAGTTGTGTGGACTATAGTGTTCATAGAATATAGGAAAATAAGAAAAC +AAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGAAATG +AGAGCGACGGGGACACAGATGACTTGGCCAAGCTTTTGGAAATGGGGGACCTTGATCCTT +GGGTTGGTGATAATTTGTAGTGCTGCAGAACAATTGTGGGTTACAGTTTATTATGGGGTT +CCTGTGTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAG +ACAGAAATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAA +GAAATACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAG +CAGATGCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGCTA +ACTCCTCTCTGTGTTACTTTAAATTGTACGAAGGTCAATATGGCCGGAGTCAATATTACT +GACACTGACAATATCACCAACATAATAGGAAATGGAACAGAGGAAGTAAGAAACTGTTCT +TTTAATATGACCACAGAACTAAATGATAAGCAGCGGCAGATTCATGCACTTTTTTATAAG +CTTGATATAGTATCAATGTATAATGATAATAGTAGTTATAGGTTAATAAATTGTAATACT +TCAGTCATTAAGCAGGCTTGTCCAAAGGTATCCTTTGATCCAATTCCTATACATTATTGT +ACTCCAGCTGGTTATGCGATTTTAAAGTGTAATGATAAGAATTTCAATGGGACAGGGCCA +TGTAAAAATGTCAGCTCAGTACAATGCACACATGGAATTAAGCCAGTGGTATCAACTCAA +TTGCTGTTAAATGGAAGTCTAGCAGAAGAAGAGATAATAATCAGATCTGAAAATCTCACA +GATAATACAAAAACCATAATAGTGCACCTTAATACATCTGTACAAATTAATTGTACCAGA +CCCTCTAACAATACAAGAACAAGTGCAAGTATAGGACCAGGACAAGTATTATTCTATAGA +CCAGGAGAAGTAATAGGAAATATAAGAAAAGCATATTGTAATATTAGTGGAACAGCATGG +AGGAAAGTTTTAAAACAGGTAACTGAAAAACTAAAAGAACACTTTAATAAAACAATACAC +GTTGAACCACACTCAGGAGGAGATCTAGAAATTACAACACATCACTTTAATTGTAGAGGG +GAATTTTTTTATTGCAATACAACAAAACTGTTTACTAATAATTGCACAGATAACAGCACA +GGGGGGTGTAATGATACTAATATCATAATTCCATGCAAGATAAGACAAATTGTACGCATG +TGGCAAGGAGTAGGACAAGCAATGTACGCTCCTCCCATCAGTGGAGAAATTAAGTGTGAA +TCAAATATTACAGGAATACTATTGACAAGAGATGGTGGTCATAATTCAACTAATGAGACC +TTCAGACCTGAAGGAGGAAATATAAAGGACAATTGGAGAAGTGAATTATATAAATATAAA +GTAGTACAAATTGATCCACTAGGAATAGCACCCACCAGGGCAAAAAGAAGAGTGGTGGAC +AGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGATCTTTGGGTTCTTAGGAGCAGCAGGA +AGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGAGAATTATTGTCTGGT +ATAGTGCAACAGCAAAGCAATTTGCTGAGGGCTATAGAGGCGCAGCAGCATCTGTTGCAA +CTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTA +AAGGATCAAAAGTTCCTAGGACTTTGGGGCTGCTCTGGAAAAATCATCTGTCCCACTGCT +GTGCCCTGGAACACCTCTTGGAGTAATAAATCTCATGACGAGATTTGGAACAACATGACA +TGGATAGAATGGGAGAGAGAAATTAGCAATTACACAAGCCAAATATATGAGATACTTACA +AAATCGCAGGACCAGCAGGATAGAAATGAAAAGGATTTGTTAGAATTGGACAATTGGGCA +AGTCTGTGGACTTGGTTTGACATATCAAATTGGCTGTGGTATATAAGAATATTTATAATG +ATAGTAGGAGGTTTAATAGGTTTAAGAATAATTTTTGCTGTGCTTTCCATAGCGAATAGA +GTTAGGCAGGGATACTCACCTCTGTCTTTCCAGACCCCTATCCAACTGCAGAGGGAACCC +GACAGGCCCGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGAGA +TTAGTCAGCGGATTCTTGACTCTTGTCTGGGACGATCTACGGAGCCTGTTCCTCTTCCTC +TACCACCGCTTGAGAGACTTCATCTTAATTGCAGCGAGGACTGTGGAACTTCTGGGACAC +AACAGTCTCAAGGGACTGAGACGGGGGTGGGAAGGCCTCAAATACCTGGGGAATCTTCTG +TTGTATTGGGGCCAGGAACTAAAAACTAGTGCTATTTCTTTGTTTAATGCTACAGCAATA +GCAGTAGGGGGGTGGACAGATAGACTTATAGAAGTAGCGCAAAGAGCTTGGAGAGCCCTT +CTCCACATACCTAGAAGAATCAGACAGGGCTTAGAAAGGGCTTTGCTATAACATGGGAGG +CAAGTGGTCAAAAAGTAGCATAGTGGGGTGGCCTCAGGTCAGGGAAAGATTAAGGAGAAC +AAACCCTCAAGCAACAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGC +AGTAACAAGTACTAATATGAATAATGCTGATAGTGTCTGGCTGAGAGCACAAGAAGAAGA +TAACGAGGGGGTAGGCTTTCCAGTCAGGCCACAGGTACCTCTAAGACCAATGACTTTTAA +GGGAGCATTTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTA +CTCCCAGAAAAGACGAGAGATCCTTGACTTATGGGTTTATAATACACAAGGCTACTTCCC +TGATTGGCAAAACTACACACCAGGGCCAGGGGTCAGATACCCACTGTGTTTTGGATGGTG +CTTCAAGTTAGTACCAGTTGACCCAAGCGAAGTAGAGGAGAACAACAAAGGAGAAAACAA +CTGCCTGCTACATCCCATGAGCCAGCATGGGCAAGAGGACGAGGAAAGAGAAGTGCTGAT +GTGGAAGTTTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACAACATCCAGAGTA +CTATAAAGACTGCTGACAAAGAAGTTTCTAACTAGGACTTCCGCTGGGGACTTTCCAGGG +GAGGTGTGGCCGGGGCGGAGCTGGGGAGTGGTTAA diff --git a/iva/test_run_data/reads_1.fq.gz b/iva/test_run_data/reads_1.fq.gz new file mode 100644 index 0000000..8f7a52a Binary files /dev/null and b/iva/test_run_data/reads_1.fq.gz differ diff --git a/iva/test_run_data/reads_2.fq.gz b/iva/test_run_data/reads_2.fq.gz new file mode 100644 index 0000000..8e74d0c Binary files /dev/null and b/iva/test_run_data/reads_2.fq.gz differ diff --git a/iva/test_run_data/reference.fasta b/iva/test_run_data/reference.fasta new file mode 100644 index 0000000..acda52b --- /dev/null +++ b/iva/test_run_data/reference.fasta @@ -0,0 +1,152 @@ +>DQ234790 +cgaacagggacttgaaagcgaaagttaatagggactcgaaagcgaaagttccagagaagt +tctctcgagcgcaggactcggcttgctgaggtgcacacagcaagaggcgagagcggcgac +tggtgagtacgccaaattttgactagcggaggctagaaggagagagatgggtgcgagagc +gtcaatattaacaggggaaaaattagatgcatgggaaaaaattcggttacggccaggggg +aaagaaaaaatatatgataaaacatctagtatgggcaagcagagagttggaaagattcgc +acttaaccctggccttttagaaacagcggaaggatgtcaacagataatagaacagttaca +gtcaactctcaagacaggatcagaagaacttaaatcattatttaatacagtagcaaccct +ctggtgcgtacaccaaaggatagaggtaaaagacaccaaggaagctttagataaattaga +ggaaatacaaaataagaaccagaaaaagacacagcaggcagcagctggcacaggaagcaa +cagcaaagtcagccaaaattaccctatagtgcaaaatgcacaaggacaaatgatacatca +gtctttatcacctagaactttgaatgcatgggtgaaagtagtagaagaaaagggctttaa +cccagaagtaatacccatgttctcagcattatcagagggagccgctccacaagatttaaa +tatgatgctaaatatagtggggggacaccaggcagcaatgcaaatgttaaaagaaaccat +caatgaggaagctgcagaatgggatagggtacacccagtacatgcagggcctattccacc +aggccaaatgagggaaccaaggggaagtgacatagcaggaaccactagtacccttcaaga +acaaataggatggatgacaagcaatccacytatcccagtgggagacatctataaaaggtg +gataattctgggattaaataaaatagtaagaatgtatagccctgttagcattttggacat +aagacaagggccaaaagaacccttcagagactatgtagataggttctataaaactctcag +agcggaacaagctacacaggaagtaaaaaattggatgacagaaaccttgctagtccaaaa +tgcgaatccagactgtaagtccattttaaaagcattaggagcaggagctactttagaaga +aatgatgacagcatgccagggagtgggaggacctagccataaagcaagggttttggctga +ggcaatgaaccaagcacaacagacaactgtaatgatgcagagaggcaattycaagggcca +gaaaagaattaagtgcttcaactgtggcagggaaggacacctagccagaaattgcagggc +ccctagaaaaaagggttgttggaaatgcgggaaggaaggacatcaaatgaaagactgcac +tgagagacaggctaattttttagggaaaatttggcctcccaacaaggggaggccagggaa +ttttcctcagagcagaccagagccttcagccccaccagcggaaaactggagggagataac +ctccttactgaagcaggagcagaaggacaaggaacacccttctccttcaatctccctcaa +atcactctttggcaacgaccccttgtcacagtaaaaataggaggacagctaaaagaagct +ctattagatacaggagcagatgatacagtattagaagatataaatttgccaggaaaatgg +aaaccaaaaatgatagggggaattggaggttttatcaaagtaagacaatatgatcagata +cttatagaaatttgtggaaaaaaggctataggtacagtattagtaggacctacacctgtc +aacataattgggcgaaatatgttgactcagattggctgtactttaaatttcccaatcagt +cctattgacactgtaccagtaaaattaaagccaggaatggatggaccaaaggttaaacag +tggccattgacagaagaaaaaataaaagcattaacagaaatttgtaaagagatggaagag +gaaggaaagatctcaaaaattgggcctgaaaatccatacaatactccagtatttgctata +aagaaaaaggacagcaccaaatggaggaaattagtagatttcagagagctcaataaaaga +acccaggacttttgggaaattcaattaggaataccacacccagcaggtttaaaaaagaaa +aaatcagtaacagtactagatgtgggagatgcatatttttcagttccattagataaayat +tttagaaagtatacagcattcaccatacctagtataaacaatgagacaccaggaatcaga +tatcagtacaatgtgctgccacagggatggaaaggatcaccggcaatattccagagtagc +atgacaaaaatcttagaaccttttagagcaaacaatccagaaataattatctatcaatac +atggatgacttgtatgtaggatctgacttagaaataggacagcatagaataaaaatagag +gagctgagagctcatttattaagctggggatttactacaccagacaaaaagcatcagaag +gaacctccattcctttggatggggtatgaactccatcctgacagatggacagtccagcct +atagaactgccagaaaaagacagctggactgtcaatgatatacagaaattagtgggaaaa +ctaaattgggcaagtcaaatttatgcagggattaagataaagcaattgtgtagactcctc +aggggagctaaagcactaacagacgtagtaccactgactgaagaagcagaattagaattg +gcagagaacagggagattctaaaaacccctgtgcatggagtatattatgacccatcaaaa +gacttagtagcagaagtacagaagcaaggacaagaccaatggacatatcaaatttatcaa +gagccatttaaaaatctaaaaacaggaaaatatgcaagaaaaaggtctgctcacactaat +gatgtaagacaattagcagaagtggtgcaaaaaatagtcacagaaagcatagtaatatgg +ggaaaggcccctaaatttaaactacccatacaaagagaaacatgggaaacatggtggatg +gagtattggcaggctacctggattcctgaatgggagtttgtcaatacccctcctctagta +aaattatggtaccaattagaaaaagaccccatagtgggagcagaaaccttctatgtagat +ggggcagctagtagggaaactaagctaggaaaagcagggtatgtcactgacagaggaaga +caaaaggtagtttccctaactgaaacaacaaatcaaaagactgagttacatgcaatctat +ttagccttgcaagattcaggatcagaagtaaatatagtaacagactcacaatatgcatta +ggaatcattcaggcacaaccagacaggagtgaatcagaaatagttagccaaataatagag +gagctaataaaaaaggaaaaagtctacctgtcatgggtaccagcacataaagggattgga +ggtaataacaaagtagataaattagtcagttcaggaatcaggaaagtgctatttttaaat +gggatagataaggctcaagaagaacatgaaagatatcacagcaattggagaacaatggct +agtgattttaatttgccacctatagtagcaaaggaaatagtagccaactgtgataaatgt +caactaaaaggggaagctatgcatggacaagtagattgtagtccagggatatggcaatta +gattgcacacatctagagggaaaagtcatcctggtagcagtccacgtggccagtggatat +atagaagcagaagttatcccagcagaaacaggacaggagacagcatacttcctgctaaaa +ttagcaggaagatggccagtaaaagtcatacacacagacaatggtagcaatttcaccagc +gctgcagttaaagcagcctgttggtgggccaatgtccgacaggaatttgggatcccctac +aatccccaaagtcaaggagtagtagaatctatgaataaggacttaaagaaaatcataggg +caggtaagagaacaagctgaacatcttaagacagcagtacaaatggcagtattcattcac +aattttaaaagaaaaggggggattggggggtacagtgcaggggaaagaataatagacata +atagcaacagacatacaaactaaagaattacaaaaacaaattacaaaaattcaaaatttt +cgggtctattacagggacagcagagacccaatttggaaaggaccagcaaaactactctgg +aaaggtgaaggggcagtagtaatacaagacaatagtgatataaaagtagtgccaagaaga +aaagcaaaaatcattagggattatggaaaacagatggcaggtgatgattgtgtggcaggt +agacaggatgaggattagaacatggaacagtttagtaaaacatcatatgtatgtctcaaa +gaaagctaaaaagtggtattatagacatcattatgaaagccagcatccaaagataagctc +agaagtacacatcccactaggagaggctagattagtaataaaaacatattggggtctgca +gacaggagaaaaggactggcaattgggtcatggagtctccatagaatggagacagagaaa +ctatagcacacaaatagatcctgaagtagcagaccgactgattcatctacaatattttga +ctgttttgcagactctgccataaggagagccatactaggacaagtagttagatataagtg +tgaatatccatcaggacataacaaggtaggatctctacaatacttggcactaagggcatt +aacagggccaaaagggagcaggccgcctctgcccagtgtaaagaaattaacagaagatag +atggagcgagccccagaagaccaggggccacagagagaaccctacaatgaatggacatta +gaactattagaggagcttaaaaatgaagctgytagacattttcctrggccctggctccat +agcttaggacagtacatctatgatacttatggggatacttgggaaggggttgaagctata +acaagaactttgcagcaactactgtttgttcatttcagaattgggtgtcaacatagcaga +ataggcattataccagggagaagaggcaggaatggagccagtagatcctaacctagagcc +ctggaatcatccgggaagtaagcctacaaccgcttgtaccaagtgttactgtaaaatatg +ttcctggcattgccaattatgctttctgaaaaaaggcttaggcatctcctatggcaggaa +gaagcggaagcaccgacgaggaactcctcggagcagtgagggccatcaaaatcctgtacc +aaagcagtaagtatttgtaaaataagtaaatgtaatgacacctcttcaaattagtgcaat +agtaggactgatagtagcgctaatcttagcaatagtagtgtggactatagtaggtttaga +agttaggaaaatactaaggcaaagaaaaatagataggttaattaagaaaataagagaaag +agaagaagacagtggaaatgagagtgaaggagacacagatgaattggccaaacttgtgga +gatgggggactttgatccttgggttggtgataatttgtagtgcctcagacaacttgtggg +ttacagtttattatggggtccctgtgtggaaagatgcagataccaccctattttgtgcat +cagatgccaaagcacatgagacagaagtgcacaatgtctgggccacgcatgcctgtgtac +ccacagaccccaacccacaagaaataaaactgggagatgtaacagaaaattttaacatgt +ggraaaataaaatggcagagcagatgcaggaggatgtaatcagtttatgggatcaaagcc +taaagccatgtgtaaagttaactcctctctgtgttactttaaactgtacccaggctaatt +ggaaatctaataacacaacccagaatataaatagctyggtcacaataggaaatatgacag +atgaagtaagaaattgttcttttaatatgaccacagaactaacagataagcagcagaagg +tctatgcacttttttataagcttgatatagtagaaattaataatagtacgtataggttaa +ttaattgtaatacttcagtcattaagcaagcttgtccaaaggtatcctttgatccaattc +ctatacattattgtactccagctggttatgtgattttaaagtgcaatgataaaaaattca +gtgggacagggccatgtaacaatgtaagctcagtacaatgcacacatggaattaagccag +tggtgtcaactcaattgctattaaatggcagcctagcagaagaagagataataattagat +ctgaaaatttcacaaataatgccaaaaccataatagtgcaccttaatgaatctgtacaaa +tcacttgtaccagaccctccaacaatacaagagaaagtgtgcgtataggaccaggacaag +tattctatagaacaggagaaataacaggagatataaggaaagcatattgtcagattaatg +caacaaaatgggaaaaagttttaaaacaggtagctaaaaaattaagagagcaatttaata +agacaaacataagatttcaaccacactcaggaggagatctagaaattacaatgcatcatt +ttaattgtaaaggggaatttttctattgcaatacaacacaactgtttgatagtagttgga +atacaacaacaaccaatagggagaaccgtagtaatttcatacttccatgcaggataaaac +aaattataaacatgtggcaggaaacaggaaaagcaatgtatgctcctcccatcaggggaa +gcattcagtgtgtatcaaatattacaggaatactattgacaagagatggtggtaataata +atgggtctaacgagaccgagacctttagacctggaggaggagatataagagacaattgga +gaagtgaattatataaatataaagtagtacaaattgaaccactaggagtagcacccacca +gggcaaagagaagagtggtggagagagaaaaaagagcagtagtgggaataggagctatga +tctttgggttcttaggagcagcaggaagcactatgggcgcggcgtcattaacgctgacgg +tacaggccagacaattactgtctggtatagtgcaacagcaaagcaatttgctgagggcta +tagaggcgcaacagcatatgttgcaactcacagtctggggcattaaacagctccaggcaa +gagtcctggctgtggaaagatacctaaaggatcaaaggttcctaggactttggggctgct +ctgggaagatcatctgcaccactgctgtgccctggaacaacacttggagtaataaatctt +atgaagaaatttggaacaacatgacatggacacaatgggagagagaaattagcaattaca +cagaccaaatatatgctatacttacagaatcgcaaaaccagcaggacaaaaatgagaagg +atttgttggaattggaccaatgggcaagtctgtggaattggtttagcataacaaagtggc +tgtggtatataaaaacatttataatgatagtaggaggtttaataggattaagaataatct +ttgctgtgctttctatagtgaatagagttaggcagggatactcacccttgtctttccaga +tccctctccaccagcagagggaaccagacagacccggaagaatcgaagaagaaggtggcg +ggcaagacagagacagatccgtaagattagtgagcggattcttagctctgttgtgggacg +atctacggaacctgtgcctcttcagctaccatcgcttgagagacttcatcttgattgtaa +cgaggactgtggaacttctgggacacagcagtctcaagggactgagactggggtgggaag +gcctcaaatatctggggaatcttctgttatattgggggcaggaactaaaaattagtgcta +tttctttgcttaatactacagcaatagcagtagcagagtggacagatagggttatagaag +tagcacaaagagcttggagggctatccttcacatacctagaagaatccgacagggcttag +aaaggactttggtataacatgggaggcaaatggtcaaaaagtagcatagtgggatggcct +caggtcagagaaagaataaggcaaactcccccagcaacagaaggagtaggagcagtatct +caagatctagataaacatggagcagtaacaagcaataatatgaataatgatgatagtgtc +tggctgagagcacaagaggaagatgaggaaggggtaggctttccagtcaggccacaggta +cctctaagaccaatgacttataaggacgcttttgatcttagcttctttttaaaagaaaag +gggggactggatgggctaatttactccaagaaaagacaagagatccttgacttatgggtt +tataacacacaaggcttcttccctgattggcagaactacacaccagggccagggattaga +tatccactgtgttttggatggtgcttcaaactagtaccagttgacccaagagaagtagag +gaggacaacaaaggagaaaacaactgcctgttgcaccccgcaagccagcatggaatagat +gacgaagaaagagaagtgctgatgtggaagtttgacagtgccctagcacgaaaacaccta +gcccgagaactgcatccagagttctataaagactgctgacaaagaagtttctaactagga +cttccgctggggactttccaggggaggtgtggccggggcggagttggggagtggctaacc +ctcagatgctgcataaaagcagccgctttgcgcttgtactgggtctctcttggtagacca +ggtcgagcccgggagctctctggctagcaagggaacccactgcttagagcctcaataaag +cttgccttgagtgcttgaagtggtgtgtgcccgtctgtgttaggactctggtaact diff --git a/iva/tests/kcount_test.py b/iva/tests/kcount_test.py index 1e33668..65ae02a 100644 --- a/iva/tests/kcount_test.py +++ b/iva/tests/kcount_test.py @@ -42,6 +42,14 @@ class TestKcount(unittest.TestCase): os.unlink(counts_file) + def test_run_kmc_two_threads(self): + '''Test test_run_kmc with two threads''' + reads = os.path.join(data_dir, 'kcount_test.run_kmc.fa') + counts_file = kcount._run_kmc(reads, 'tmp.run_kmc', 10, 2, 4, threads=2) + self.assertTrue(filecmp.cmp(counts_file, os.path.join(data_dir, 'kcount_test.run_kmc.counts'), shallow=False)) + os.unlink(counts_file) + + def test_kmc_to_kmer_counts(self): '''Test _kmc_to_kmer_counts''' counts = kcount._kmc_to_kmer_counts(os.path.join(data_dir, 'kcount_test.kmc_counts'), number=2) diff --git a/scripts/iva b/scripts/iva index af7eea2..1484839 100755 --- a/scripts/iva +++ b/scripts/iva @@ -63,10 +63,21 @@ trimming_group.add_argument('--pcr_primers', action=iva.common.abspathAction, he other_group = parser.add_argument_group('Other options') other_group.add_argument('-i', '--max_insert', type=int, help='Maximum insert size (includes read length). Reads with inferred insert size more than the maximum will not be used to extend contigs [%(default)s]', default=800, metavar='INT') other_group.add_argument('-t', '--threads', type=int, help='Number of threads to use [%(default)s]', default=1, metavar='INT') +other_group.add_argument('--kmc_onethread', action='store_true', help='Force kmc to use one thread. By default the value of -t/--threads is used when running kmc') other_group.add_argument('--strand_bias', type=float, help='Set strand bias cutoff of mapped reads when trimming contig ends, in the interval [0,0.5]. A value of x means that a base needs min(fwd_depth, rev_depth) / total_depth <= x. The only time this should be used is with libraries with overlapping reads (ie fragment length < 2*read length), and even then, it can make results worse. If used, try a low value like 0.1 first [%(default)s]', default=0, metavar='FLOAT in [0,0.5]') +other_group.add_argument('--test', action='store_true', help='Run using built in test data. All other options will be ignored, except the mandatory output directory, and --trimmomatic and --threads can be also be used') other_group.add_argument('--version', action='version', version=iva.common.version) options = parser.parse_args() + +if options.test: + print('Running iva in test mode...') + this_script = os.path.abspath(__file__) + tester = iva.test_data_runner.Tester(options.outdir, this_script, trimmo_jar=options.trimmomatic, threads=options.threads) + tester.run() + sys.exit() + + if options.seed_stop_length == 0: options.seed_stop_length = int(0.9 * options.max_insert) @@ -92,6 +103,12 @@ if os.path.exists(options.outdir): sys.exit(1) +if options.kmc_onethread: + kmc_threads = 1 +else: + kmc_threads = options.threads + + iva.external_progs.get_all_versions(iva.external_progs.assembly_progs) try: @@ -188,6 +205,7 @@ elif options.reference: index_k = options.smalt_k, index_s = options.smalt_s, threads = options.threads, + kmc_threads = kmc_threads, max_insert = options.max_insert, minid = 0.9, seed_stop_length = options.seed_stop_length, @@ -213,6 +231,7 @@ assembly = iva.assembly.Assembly( map_index_k = options.smalt_k, map_index_s = options.smalt_s, threads = options.threads, + kmc_threads = kmc_threads, map_minid = options.smalt_id, contig_iter_trim = options.ctg_iter_trim, ext_min_cov = options.ext_min_cov, diff --git a/setup.py b/setup.py index 828fb03..dec5c89 100644 --- a/setup.py +++ b/setup.py @@ -32,10 +32,10 @@ if not found_all_progs: setup( name='iva', - version='1.0.0', + version='1.0.4', description='Iterative Virus Assembler', packages = find_packages(), - package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*']}, + package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*', 'test_run_data/*']}, author='Martin Hunt', author_email='[email protected]', url='https://github.com/sanger-pathogens/iva', @@ -43,9 +43,9 @@ setup( test_suite='nose.collector', tests_require=['nose >= 1.3'], install_requires=[ - 'pyfastaq >= 3.0.1', + 'pyfastaq >= 3.10.0', 'networkx >= 1.7', - 'pysam >= 0.8.1' + 'pysam >= 0.8.1, <= 0.8.3', ], license='GPLv3', classifiers=[ -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iva.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
