[med-svn] [qcumber] 02/06: New upstream version 1.0.9+dfsg

Andreas Tille Mon, 13 Mar 2017 07:17:41 -0700

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository qcumber.


commit 966a33aad7fbeb70a28f2f08489c8662e01ad4a0
Author: Andreas Tille <[email protected]>
Date:   Mon Mar 13 14:13:56 2017 +0100

    New upstream version 1.0.9+dfsg
---
 QCumber.py        | 600 ++++++++++++++++++++++++++++--------------------------
 barplot.R         |  56 +++--
 batch_report.html |  10 +-
 classes.py        |  96 +++------
 config.txt        |   6 +-
 helper.py         |  11 +-
 parameter.txt     |  13 +-
 report.tex        |  14 +-
 8 files changed, 410 insertions(+), 396 deletions(-)

diff --git a/QCumber.py b/QCumber.py
index c56f2a7..c7eccde 100755
--- a/QCumber.py
+++ b/QCumber.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 __author__ = 'LieuV'
-__version__ = "1.0.5"
+__version__ = "1.0.9"
 
 from classes import *
 from helper import *
@@ -11,7 +11,6 @@ import shutil
 import configparser
 from collections import OrderedDict
 import json
-import csv
 
 #dependencies
 try:
@@ -20,95 +19,102 @@ except ImportError:
        print('The "argparse" module is not available. Use Python >3.2.')
        sys.exit(1)
 
+#parameter = configparser.ConfigParser(strict=False, interpolation=None)
+#parameter.read(join(dirname(__file__), "parameter.txt"))
 
-parameter = configparser.ConfigParser()
-parameter.read(join(dirname(__file__), "parameter.txt"))
+#r1_pattern = parameter["Pattern"]["R1"]
+#r2_pattern = parameter["Pattern"]["R2"]
+#sep_pattern = parameter["Pattern"]["sep"]
+#lane_pattern = parameter["Pattern"]["lane"]
+global parameter
+global r1_pattern
+global r2_pattern
+global sep_pattern
+global lane_pattern
 
-file_extensions = parameter["Fileextension"]["fastq"]
-r1_pattern = parameter["Pattern"]["R1"]
-r2_pattern = parameter["Pattern"]["R2"]
-lane_pattern = parameter["Pattern"]["lane"]
-
-def get_illumina_reads(  output, tmp):
-       ## Add readsets to sample
+def get_illumina_reads(tmp):
        readsets = []
+
        # split by lanes
-       if isinstance(arguments["r1"], list):
-               for lane in sorted(set([re.search(lane_pattern, x).group() for 
x in arguments["r1"]])):
-                       # concat same files
-                       r1_reads = [x for x in arguments["r1"] if lane in x]
-                       readname = re.sub(r1_pattern + ".*", "", 
os.path.basename(r1_reads[0]))
-                       if len(arguments["r1"]) != 1:
-                               r1 = FastQFile(join_reads(r1_reads, tmp, 
readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
-                       else:
-                               r1 = FastQFile(r1_reads[0])
-                       if arguments["r2"]:
-                               r2_reads = [x for x in arguments["r2"] if lane 
in x]
+       for lane in sorted(set([re.search(lane_pattern, x).group() for x in 
arguments["r1"]])):
+               # concat same lanes
+               r1_reads = [x for x in arguments["r1"] if lane in x]
+               readname = re.sub(r1_pattern + ".*", "", 
os.path.basename(r1_reads[0]))
+               if len(arguments["r1"]) != 1:
+                       r1 = FastQFile(join_reads(r1_reads, tmp, readname + 
"_R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
+               else:
+                       r1 = FastQFile(r1_reads[0])
+               if arguments["r2"]:
+                       r2_reads = [x for x in arguments["r2"] if lane in x]
 
-                               if len(r2_reads) != 1:
-                                       r2 = FastQFile(join_reads(r2_reads, 
tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
-                               else:
-                                       r2 = FastQFile(r2_reads[0])
-                               readsets.append(ReadSet(r1,r2))
+                       if len(r2_reads) != 1:
+                               r2 = FastQFile(join_reads(r2_reads, tmp, 
readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
                        else:
-                               readsets.append(ReadSet(r1))
-       else:
-               r1 = FastQFile(arguments["r1"])
-               if arguments["r2"]:
-                       r2 = FastQFile(arguments["r2"])
-                       return [ReadSet(r1,r2)]
+                               r2 = FastQFile(r2_reads[0])
+                       readsets.append(ReadSet(r1,r2))
                else:
-                       return [ReadSet(r1)]
-
-
+                       readsets.append(ReadSet(r1))
        return readsets
 
-def run_analysis (readset, arguments, output, tmp):
-       print("Run FastQC...")
-       readset.run_FastQC(join(output, "FastQC"))
-       if arguments["trimBetter"]:
-               trimming_perc = arguments["trimBetter_threshold"]
-       else:
-               trimming_perc = ""
-       readset.run_Trimmomatic(arguments["adapter"], 
arguments["palindrome"],arguments["minlen"], arguments["trimOption"], 
trimming_perc, arguments["gz"])
-       readset.trimRes = readset.trimRes.run_FastQC( tmp)
-       return readset
-
+def get_input():
 
-def check_input(arguments):
        if arguments["r1"]:
                if arguments["r2"]:
                        return [arguments["r1"], arguments["r2"]]
                else:
                        return [arguments["r1"]]
 
-       if arguments["technology"]=="Illumina":
-               if os.path.isdir(arguments["input"]):
-                       files = os.listdir(arguments["input"])
-                       files = [os.path.join(arguments["input"], x) for x in 
files]
-                       files = [x for x in files if os.path.isfile(x) & 
any([ext in x for ext in file_extensions])]
-                       if len(files)==0:
-                               sys.exit(arguments["input"] + " does not 
contain fastq files.")
-                       return files
+       all_files = []
+       bam_ext = [x.strip(" ") for x in 
parameter["Fileextension"]["bam"].split(",")]
+       fastq_ext = [x.strip(" ") for x in 
parameter["Fileextension"]["fastq"].split(",")]
+       if arguments["technology"]:
+               if arguments["technology"]=="IonTorrent":
+                       fastq_ext = []
                else:
-                       return [arguments["input"]]
-       elif arguments["technology"]=="IonTorrent":
+                       bam_ext = []
 
-               if os.path.isfile(arguments["input"]):
+       for root, dirs, files in os.walk(arguments["input"]):
+               for file in files:
+                       if any([file.endswith(ext) for ext in fastq_ext + 
bam_ext ]):
+                               all_files.append(join(root,file))
 
-                       if arguments["input"].endswith(".bam"):
-                               return arguments["input"]
-                       else:
-                               sys.exit("IonTorrent PGM input has to be a 
bam-file. Otherwise use -b. Use --help for more information.")
+       if not arguments["technology"]:
+               if len([x for x in all_files if any([ext in x for ext in 
bam_ext])]) == len(all_files):
+                       arguments["technology"] = "IonTorrent"
                else:
-                       sys.exit("Incorrect file input")
-       else:
-               sys.exit("Incorrect file input")
-       return None
+                       arguments["technology"] = "Illumina"
+       #if arguments["technology"]:
+       #       if arguments["technology"]=="Illumina":
+       #               # fastq only
+       #               all_files = [x for x in all_files if any([ext in x for 
ext in fastq_ext])]
+       #       elif arguments["technology"]=="IonTorrent":
+       #               # bam only
+       #               all_files = [x for x in all_files if any([ext in x for 
ext in bam_ext])]
+
+       if (len(all_files) == 0):
+               sys.exit(arguments["input"] + " does not contain fastq or bam 
files.")
+
+       # find read pairs
+       all_files = sorted(list(all_files))
+       paired_reads_pattern =sep_pattern.join([lane_pattern , "(" + r1_pattern 
+ "|" + r2_pattern + ")","\d{3}"])
+       if all([re.search(paired_reads_pattern, x) for x in all_files]):
+               paired_reads = []
+               for setname, files in groupby(all_files, key=lambda x: 
"_".join(x.split("_")[:-3])):
+                       temp = sorted(list(files))
+                       r1_reads = [x for x in temp if r1_pattern in x]
+                       r2_reads = [x for x in temp if r2_pattern in x]
+                       if len(r1_reads) != 0:
+                               if len(r2_reads) != 0:
+                                       paired_reads.append([r1_reads, 
r2_reads])
+                               else:
+                                       paired_reads.append(r1_reads)
+               return paired_reads
+       else: # treat each file as sample
+               return [[x] for x in all_files ]
+
 
 def getSetname():
        try:
-               print("Renmae", arguments["rename"], arguments["r1"])
                if arguments["rename"]:
                        new_name = [arguments["rename"][x] for x in 
arguments["rename"].keys() if basename(arguments["r1"][0]).startswith(x)]
 
@@ -120,51 +126,57 @@ def getSetname():
                        else:
                                print("Renamed to " ,new_name)
                                return new_name[0]
-
-               if arguments["technology"] == "Illumina":
-
-                       if isinstance(arguments["r1"], list):
-                               return 
"_".join(basename(arguments["r1"][0]).split("_")[:-3])
-                       else:
-                               return 
getFilenameWithoutExtension(arguments["r1"], True)
-               else:
-                       return getFilenameWithoutExtension(arguments["r1"], 
getBase=True)
        except:
+               print("Couldnt rename sample files.")
+       try:
+               paired_reads_pattern = sep_pattern.join([sep_pattern + 
lane_pattern, "(" + r1_pattern + "|" + r2_pattern + ")", "\d{3}.*"])
                if isinstance(arguments["r1"], list):
-                       return getFilenameWithoutExtension(arguments["r1"][0], 
getBase=True)
+                       return re.sub(paired_reads_pattern, "" 
,basename(arguments["r1"][0]))
                else:
-                       return getFilenameWithoutExtension(arguments["r1"], 
getBase=True)
+                       return 
re.sub(paired_reads_pattern,"",basename(arguments["r1"]))
+       except:
+               print("Problems getting samplenames")
+               return arguments["r1"]
 
-def fillClasses(temp_bowtie_path = "", tmp=''):
+def runAnalyses(temp_bowtie_path, tmp):
        try:
                output = getDir([arguments["output"], "QCResults"], True)
-
                sample = Sample(getSetname(), output, 
arguments["reference"],arguments["threads"], arguments["technology"])
+
                if arguments["technology"].startswith("Illumina"):
-                       readsets = get_illumina_reads( output, tmp)
+                       readsets = get_illumina_reads(tmp)
                elif arguments["technology"] == "IonTorrent":
                        print("IonTorrent", arguments["r1"])
                        fastqfile = bam_to_fastq(arguments["r1"],tmp)
                        readsets = [ReadSet( fastqfile)]
 
                for rs in readsets:
-                       readset = run_analysis(rs, arguments,  output, tmp)
-                       sample = sample.add_readSet(readset)
+                       print("Run FastQC...")
+                       rs.run_FastQC(join(output, "FastQC"))
+                       if not arguments["notrimming"]:
+                               if arguments["trimBetter"]:
+                                       trimming_perc = 
arguments["trimBetter_threshold"]
+                               else:
+                                       trimming_perc = ""
+                               rs.run_Trimmomatic(arguments["adapter"], 
arguments["palindrome"], arguments["minlen"],
+                                                                               
arguments["trimOption"], trimming_perc, arguments["gz"])
+                               rs.trimRes = rs.trimRes.run_FastQC(tmp)
+                       sample = sample.add_readSet(rs)
                if not arguments["nomapping"]:
                        if arguments["save_mapping"]:
-                               sample.mappingRes = 
sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", 
sample.name +".bam"))
+                               sample.mappingRes = 
sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", 
sample.name +".bam"), not arguments["notrimming"])
                        else:
-                               sample.mappingRes = 
sample.run_Bowtie2(temp_bowtie_path, "/dev/null")
+                               sample.mappingRes = 
sample.run_Bowtie2(temp_bowtie_path, "/dev/null", not arguments["notrimming"])
                if not arguments["nokraken"]:
                        print("Run Kraken.")
-                       sample = sample.run_Kraken(arguments["db"])
+                       sample = sample.run_Kraken(arguments["db"], not 
arguments["notrimming"])
 
                sample.stop = 
datetime.datetime.strftime(datetime.datetime.now(),"%d.%m.%Y, %H:%M:%S")
        finally:
                shutil.rmtree(tmp)
        return sample
 
-def writeReport(sample, arguments):
+def writeReport(sample):
        report_name = os.path.join(sample.mainResultsPath, "Report",
                 "summary_" + sample.name + "_" + 
datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%y_%H-%M"))
        print("Writing latex " ,report_name)
@@ -172,15 +184,17 @@ def writeReport(sample, arguments):
        env.loader = FileSystemLoader(os.path.dirname(__file__))
 
        template = env.get_template("report.tex")
-       pdf_latex = template.render(sample=sample, pipeline=Pipeline(), 
trim_param = 
sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"],
 arguments["trimOption"]))
+       try:
+               trim_param= 
sample.readSets[0].trimRes.print_param(arguments["palindrome"], 
arguments["minlen"], arguments["trimOption"])
+       except:
+               trim_param = "None"
+       pdf_latex = template.render(sample=sample, pipeline=Pipeline(), 
trim_param = trim_param )
 
        latex = open(report_name + ".tex", "w")
        latex.write(pdf_latex)
        latex.close()
 
        process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", 
"-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + 
".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
-       for line in iter(process.stdout.readline, b''):
-               pass #print(line)
        for line in iter(process.stderr.readline, b''):
                print(line)
 
@@ -191,6 +205,140 @@ def writeReport(sample, arguments):
                        os.remove(report_name + ext)
                except OSError:
                        pass
+def check_input_validity():
+       #
+       # Check options for Trimmomatic
+       #pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", 
arguments["trimOption"])
+       #if (arguments["trimOption"] != ""):
+       #       if not ((pattern.group("option") == "SLIDINGWINDOW") or 
(pattern.group("option") == "MAXINFO")):
+       #               sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 
'MAXINFO' allowed")
+       #       try:
+       #               int(pattern.group("value1"))
+       #               int(pattern.group("value2"))
+       #       except:
+       #               sys.exit("Check trimOptions.")
+
+       #
+       # Check if adapter is required
+       if  not (arguments['notrimming'] or arguments['adapter'] or 
arguments["technology"] == "IonTorrent") :
+               sys.exit("Illumina projects requires an adapter file")
+
+       #
+       # Check if reference is required and valid
+       if arguments["nomapping"]:
+               arguments["reference"] = ""
+       else:
+               if not arguments["reference"]:
+                       sys.exit("Mapping needs a reference.")
+               else:
+                       if not os.path.exists(arguments["reference"]):
+                               sys.exit("Reference does not exist.")
+
+       #
+       #Check validity of Kraken DB
+       if not arguments["nokraken"]:
+               if not os.path.exists(arguments["db"]):
+                       sys.exit(arguments["db"] + " does not exist. Enter a 
valid database for kraken")
+               else:
+                       if "database.kdb" not in os.listdir(arguments["db"]):
+                               sys.exit("database " + arguments["db"] + " does 
not contain necessary file database.kdb")
+       #
+       # Check input
+       if arguments["input"]:
+               if not os.path.exists(arguments["input"]):
+                       sys.exit(arguments["input"] + " does not exist.")
+       else:
+               if not os.path.exists(arguments["r1"]):
+                       sys.exit(arguments["r1"] + " does not exist.Input file 
required. Use option -input or -1 / -2")
+
+               if arguments["r2"]:
+                       if not os.path.exists(arguments["r2"]):
+                               sys.exit(arguments["r2"] + " does not exist.")
+
+def get_defaults():
+       if arguments["forAssembly"]:
+               if not arguments["trimBetter_threshold"]:
+                       arguments["trimBetter_threshold"] = 
parameter["forAssembly." + arguments['technology']][
+                               "trimBetter_threshold"]
+               if not arguments["trimOption"]:
+                       arguments["trimOption"] = parameter["forAssembly." + 
arguments['technology']]["trimOption"]
+
+       elif arguments["forMapping"]:
+               if not arguments["trimBetter_threshold"]:
+                       arguments["trimBetter_threshold"] = 
parameter["forMapping." + arguments['technology']][
+                               "trimBetter_threshold"]
+               if not arguments["trimOption"]:
+                       arguments["trimOption"] = parameter["forMapping." + 
arguments['technology']]["trimOption"]
+
+       if not arguments["trimOption"]:
+               arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
+       if not arguments["trimBetter_threshold"]:
+               arguments["trimBetter_threshold"] = 
parameter["Trimmomatic"]["trimBetter_threshold"]
+
+
+def plot():
+       try:
+               #
+               # Plot BOXPLOTS
+               boxplots = [{"file": "Per_sequence_quality_scores.csv",
+                            "output": join(arguments["output"], 
"QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+                            "title": "Per sequence quality scores",
+                            "ylab": "Mean Sequence Quality (Phred Score)",
+                            "xlab": "Sample"},
+                           {"file": "Sequence_Length_Distribution.csv",
+                            "output": join(arguments["output"], 
"QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+                            "title": "Sequence Length Distribution",
+                            "ylab": "Sequence Length (bp)",
+                            "xlab": "Sample"},
+                           {"file": "Per_sequence_GC_content.csv",
+                            "output": join(arguments["output"], 
"QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+                            "title": "Per sequence GC content",
+                            "ylab": "Mean GC content (%)",
+                            "xlab": "Sample"}]
+               for plot in boxplots:
+                       process = subprocess.Popen(" ".join(["Rscript --vanilla 
", join(os.path.dirname(__file__), "boxplot.R"),
+                                                            
join(arguments["output"], "QCResults", "Report", "src", plot["file"]),
+                                                            plot["output"], 
'"' + plot["title"] + '"', '"' + plot["xlab"] + '"',
+                                                            '"' + plot["ylab"] 
+ '"']),
+                                                  stderr=subprocess.PIPE, 
stdout=subprocess.PIPE, shell=True)
+                       #for line in iter(process.stderr.readline, b''):
+                       #       print(line)
+                       process.communicate()
+
+               #
+               # Plot BARPLOTS
+               process = subprocess.Popen(
+                       " ".join(["Rscript --vanilla ", 
join(os.path.dirname(__file__), "barplot.R"), join(arguments["output"], 
"QCResults/Report/src", "summary.json"),
+                                 join(arguments["output"], 
"QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, 
shell=True)
+               process.communicate()
+       except:
+               print("Couldnt plot summary")
+
+
+def writeSummaryTex(batch_json):
+       try:
+               summary_latex_name = join(arguments["output"], "QCResults", 
"Report", "summary.tex")
+               with open(summary_latex_name, "w") as summary_latex:
+                       
summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
+                       summary_latex.write(
+                               "\\rowcolor{tableBlue} \\textbf{Sample} &  
\\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} 
&\\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & 
\\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
+
+                       for i in range(0, len(batch_json["summary"])):
+                               summary_latex.write(
+                                       
str(batch_json["summary"][i]["setname"]) + " & " +
+                                       str(batch_json["summary"][i]["Reads 
after trimming [#]"]) + " & " +
+                                       str(batch_json["summary"][i]["Reads 
after trimming [%]"]) + " & " +
+                                       str(batch_json["summary"][i]["Map to 
reference [#]"]) + " & " +
+                                       str(batch_json["summary"][i]["Map to 
reference [%]"]) + " & " +
+                                       
str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
+                                       str(batch_json["summary"][i]["Shorter 
fragments"]) + "\\\\\n"
+                               )
+                       summary_latex.write("\\end{tabular}\n")
+                       summary_latex.write("\\caption{Statistics on reads 
after trimming. }")
+                       summary_latex.close()
+       except:
+
+               print("Couldnt write summary.tex file.")
 
 def main(arguments):
 
@@ -204,7 +352,12 @@ def main(arguments):
        makedirs(join(arguments["output"], "QCResults", "Report", "src"), 
exist_ok=True)
        makedirs(join(arguments["output"], "QCResults", "Report", "src", 
"img"), exist_ok=True)
 
+       if not os.path.exists(join(arguments["output"], "QCResults", "Report", 
"lib" )):
+               
shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], 
"QCResults", "Report", "lib" ))
+
+       ####
        # rename sample names
+       ####
        if arguments["rename"]:
                rename_dict = {}
                with open(arguments["rename"], "r") as csvfile:
@@ -212,159 +365,58 @@ def main(arguments):
                                row = line.split("\t")
                                rename_dict[row[0]] = row[1].strip()
                arguments["rename"] = rename_dict
+
+       ###
        # create new boxplot csv files
+       ###
        for csv in ["Per_sequence_quality_scores.csv", 
"Per_sequence_GC_content.csv", "Sequence_Length_Distribution.csv"]:
                new_csv = open(join(arguments["output"], "QCResults", "Report", 
"src", csv), "w")
                new_csv.write("Sample,Read,Lane,Trimmed,Value,Count\n")
                new_csv.close()
-       ##
-       # SAV
+
+       ###
+       # Write SAV
+       ###
        try:
                if arguments["sav"]:
                        write_SAV(arguments["sav"], join(arguments["output"], 
"QCResults", "Report", "src"))
        except:
                print("Couldnt write SAV.")
-       ##
 
-       if not os.path.exists(join(arguments["output"], "QCResults", "Report", 
"lib" )):
-               
shutil.copytree(join(os.path.dirname(__file__),"lib"),join(arguments["output"], 
"QCResults", "Report", "lib" ))
-       if arguments["technology"].startswith("Illumina"):
-               technology = "Illumina"
-       else:
-               technology = "IonTorrent"
 
-       if arguments["forAssembly"]:
-               if not arguments["trimBetter_threshold"]:
-                       arguments["trimBetter_threshold"] = 
parameter["forAssembly." + technology]["trimBetter_threshold"]
-               if not arguments["trimOption"]:
-                       arguments["trimOption"] = parameter["forAssembly." + 
technology]["trimOption"]
-
-       elif arguments["forMapping"]:
-               if not arguments["trimBetter_threshold"]:
-                       arguments["trimBetter_threshold"] = 
parameter["forMapping." + technology]["trimBetter_threshold"]
-               if not arguments["trimOption"]:
-                       arguments["trimOption"] = parameter["forMapping." + 
technology]["trimOption"]
-
-       if not arguments["trimOption"]:
-               arguments["trimOption"] = parameter["Trimmomatic"]["trimOption"]
-       if not arguments["trimBetter_threshold"]:
-               arguments["trimBetter_threshold"] = 
parameter["Trimmomatic"]["trimBetter_threshold"]
-
-       # check input
-       pattern = re.match("(?P<option>\w+):(?P<value1>\d+):(?P<value2>\d+)", 
arguments["trimOption"])
-       if(arguments["trimOption"]!=""):
-               if not ((pattern.group("option") =="SLIDINGWINDOW") or 
(pattern.group("option") =="MAXINFO")):
-                       sys.exit("Check trimOption. Only 'SLIDINGWINDOW' or 
'MAXINFO' allowed")
-               try:
-                       int(pattern.group("value1"))
-                       int(pattern.group("value2"))
-               except:
-                       sys.exit("Check trimOptions.")
-
-       if arguments['technology'] == "Illumina":
-               if not arguments['adapter']:
-                       sys.exit("Illumina projects requires an adapter file")
-       if arguments["nomapping"]:
-               arguments["reference"] = ""
-       else:
-               if not arguments["reference"]:
-                       sys.exit("Mapping needs a reference.")
-               else:
-                       if not os.path.exists(arguments["reference"]):
-                               sys.exit("Reference does not exist.")
-
-       if not arguments["nokraken"]:
-               if not os.path.exists(arguments["db"]):
-                       sys.exit(arguments["db"] + " does not exist. Enter a 
valid database for kraken")
-               else:
-                       if "database.kdb" not in os.listdir(arguments["db"]):
-                               sys.exit("database "+arguments["db"] +" does 
not contain necessary file database.kdb")
-       if not arguments["input"]:
-               if arguments["r1"]:
-                       
print(os.path.abspath(arguments["r1"]),os.path.exists(arguments["r1"]))
-                       if not os.path.exists(arguments["r1"]):
-                               sys.exit(arguments["r1"] + " does not exist.")
-               else:
-                       sys.exit("Input file required. Use option -input or -1 
/ -2")
-
-               if arguments["r2"]:
-                       if not os.path.exists(arguments["r2"]):
-                               sys.exit(arguments["r2"] + " does not exist.")
+       #
+       # Check input validity
+       check_input_validity()
 
        if arguments["index"]:
                mapping_index_path = arguments["index"]
        else:
                mapping_index_path = join(tmp, "bowtie2_index")
                os.makedirs(mapping_index_path, exist_ok=True)
-       # RUN
-       try:
-               batch_json = json.dump({"commandline":arguments, 
"summary":[],"kraken":{}, "versions":Pipeline().__dict__}, 
open(join(arguments["output"], "QCResults/Report/src", "summary.json"),"w"))
-               i = 1
-               project = []
-               tmp_overview = open(join(tmp, "overview.csv"), "w")
-               tmp_overview.write("Sample,Trimmed,Mapped,Classified\n")
-               myFiles = []
-
-               #filter samples and put it into for loop
-               # Get folder and filter by last three pattern splitted by "_"
-               if arguments["input"]:
-                       input = [ join(arguments["input"], x) for x in 
sorted(os.listdir(arguments["input"]))]
-                       #### check input format ####
-                       # Case 1: Folder contains fastq files
-                       if len([x for x in input if isfile(x) and 
any([x.endswith(ext) for ext in file_extensions]) ])>0:
-                               input= [x for x in input if isfile(x) and 
any([x.endswith(ext) for ext in file_extensions]) ]
-                               if len(input)!=0:
-                                       if 
arguments["technology"].startswith("Illumina"):
-                                               for setname, files in 
groupby(input, key=lambda x: "_".join(x.split("_")[:-3])):
-                                                       #for setname, files in 
groupby(files, key=lambda x: "_".join(x.split("_")[:-2])):
-                                                       temp_file =  
sorted(list(files))
-                                                       r1_reads = [x for x in  
temp_file if r1_pattern in x]
-                                                       r2_reads = [x for x in 
temp_file if r2_pattern in x]
-                                                       if len(r1_reads) != 0:
-                                                               if 
len(r2_reads) != 0:
-                                                                       
myFiles.append([r1_reads, r2_reads])
-                                                               else:
-                                                                       
myFiles.append(r1_reads)
-                                       else:
-                                               # IonTorrent
-                                               myFiles = input
-                       # Case 2: Folder contains subfolder for each sample
-                       else:
-                               input = [x for x in input if os.path.isdir(x)]
-
-                               if len(input) > 0:
-                                       for sample in input:
-                                               files = sorted([join(sample,x) 
for x in listdir(sample) if any([x.endswith(ext) for ext in file_extensions])])
-                                               if 
arguments["technology"].startswith("Illumina"):# and len(files) > 1:
-                                                       for setname, files in 
groupby(files, key=lambda x: "_".join(x.split("_")[:-3])):
-                                                               temp_file = 
sorted(list(files))
-                                                               r1_reads = [x 
for x in temp_file if r1_pattern in x]
-                                                               r2_reads = [x 
for x in temp_file if r2_pattern in x]
-                                                               if 
len(r1_reads)!=0:
-                                                                       if 
len(r2_reads)!=0:
-                                                                               
myFiles.append([r1_reads, r2_reads])
-                                                                       else:
-                                                                               
myFiles.append(r1_reads)
-                                               else:
-                                                       
myFiles.append(sorted(list(files)))
-                                       files = None
-                       if len(myFiles) == 0:
-                                       sys.exit("Enter a valid folder, which 
contain sample folders")
-               else:
-                       if arguments["r1"]:
-                               if not os.path.exists(arguments["r1"]):
-                                       sys.exit(arguments["r1"] + " does not 
exist.")
 
-                       if arguments["r2"]:
-                               myFiles.append([arguments["r1"], 
arguments["r2"]])
-                       else:
-                               myFiles.append([arguments["r1"]])
+       myFiles = get_input()
+       print("Found " + str(len(myFiles)) + " sample(s).")
 
-               print("Found " + str(len(myFiles)) + " sample(s).")
+       #
+       # Get parameter defaults
+       get_defaults()
 
+       batch_json = json.dump({
+                       "commandline": arguments,
+                       "summary": [],
+                       "kraken": {},
+                       "versions": Pipeline().__dict__},
+           open(join(arguments["output"], "QCResults/Report/src", 
"summary.json"), "w"))
+
+       output = join(arguments["output"], "QCResults")  # 
getDir([arguments["output"], "QCResults"], True)
+
+       i = 1
+       #######
+       # Run for each sample
+       #######
+       try:
                for subset in myFiles:
                        print("Analyse " + str(i) + "/" + str(len(myFiles)))
-
                        sample_tmp = join(tmp, "Sample_" + str(i))
                        os.makedirs(sample_tmp, exist_ok=True)
 
@@ -375,11 +427,17 @@ def main(arguments):
                                if len(subset)>1:
                                        arguments["r2"] = subset[1]
 
-                       sample = fillClasses( mapping_index_path, sample_tmp )
+                       ####
+                       # Run Analyses
+                       ####
+                       sample = runAnalyses( mapping_index_path, sample_tmp )
+
+                       ####
+                       # Write output
+                       ####
                        if sample is not None:
-                               project.append(sample)
                                try:
-                                       writeReport(sample, arguments)
+                                       writeReport(sample)
                                except:
                                        print("Couldnt write pdf.")
                                try:
@@ -396,88 +454,43 @@ def main(arguments):
                                        else:
                                                kraken = 
sample.krakenRes.pClassified
                                                
batch_json["kraken"][sample.name] = 
json.loads(str_to_json(sample.krakenRes.report))
-                                       
tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), 
str(nMapping), str(kraken)]) +"\n")
+                                       
#tmp_overview.write(",".join([sample.name, str(sample.nTrimmed()), 
str(nMapping), str(kraken)]) +"\n")
+                                       try:
+                                               trim_param = 
sample.readSets[0].trimRes.print_param(arguments["palindrome"], 
arguments["minlen"],
+                                                                               
                                           arguments["trimOption"])
+                                       except:
+                                               trim_param="None"
                                        batch_json["summary"].append(
                                                        
OrderedDict([("setname", sample.name),
-                                                                    ("Total 
reads [#]", sample.total_reads()),
-                                                                    ("Reads 
after trimming [#]", sample.nTrimmed()),
-                                                                    ("Reads 
after trimming [%]", sample.pTrimmed()),
+                                                                               
 ("Total reads [#]", sample.total_reads()),
+                                                                               
 ("Reads after trimming [#]", sample.nTrimmed()),
+                                                                               
 ("Reads after trimming [%]", sample.pTrimmed()),
                                                                                
 ("Map to reference [#]", nMapping),
                                                                                
 ("Map to reference [%]", pMapping),
                                                                                
 ("Classified (Kraken)", kraken),
                                                                                
 ("Shorter fragments", sample.get_mean_trimRes()),
-                                                                               
 ("Trim Parameter", 
sample.readSets[0].trimRes.print_param(arguments["palindrome"],arguments["minlen"],arguments["trimOption"])),
+                                                                               
 ("Trim Parameter",trim_param),
                                                                                
 ("images", [x.get_all_qc_dict() for x in sample.readSets])]))
 
                                        json.dump(batch_json, 
open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w"))
                                except:
                                        print("Couldnt produce HTML report.")
+
                        sample = None
+                       arguments["r1"]=None
+                       arguments["r2"]=None
                        i+=1
+               # plot boxplots and barplots
+               plot()
+               writeSummaryTex(batch_json)
+               shutil.copy(join(os.path.dirname(__file__), 
"batch_report.html"), join(arguments["output"], "QCResults", "Report", 
"batch_report.html"))
 
-               try:
-                       # plot boxplots
-                       boxplots = [{"file":"Per_sequence_quality_scores.csv",
-                                                "output": 
join(arguments["output"], "QCResults/Report/src/img", 
"Per_sequence_quality_scores.png"),
-                                                "title": "Per sequence quality 
scores",
-                                                "ylab": "Mean Sequence Quality 
(Phred Score)",
-                                               "xlab": "Sample" },
-                                               {"file": 
"Sequence_Length_Distribution.csv",
-                                                "output": 
join(arguments["output"], "QCResults/Report/src/img", 
"Sequence_Length_Distribution.png"),
-                                                "title": "Sequence Length 
Distribution",
-                                                "ylab": "Sequence Length (bp)",
-                                                "xlab": "Sample"},
-                                               {"file": 
"Per_sequence_GC_content.csv",
-                                                "output": 
join(arguments["output"], "QCResults/Report/src/img", 
"Per_sequence_GC_content.png"),
-                                                "title": "Per sequence GC 
content",
-                                                "ylab": "Mean GC content (%)",
-                                                "xlab": "Sample"} ]
-                       for plot in boxplots:
-                               process = subprocess.Popen(" ".join(["Rscript 
--vanilla ",join(os.path.dirname(__file__) ,"boxplot.R"),
-                                               
join(arguments["output"],"QCResults", "Report", "src", plot["file"]), 
plot["output"], '"'+ plot["title"]+'"', '"'+plot["xlab"]+'"','"'+ 
plot["ylab"]+'"']),
-                                               stderr=subprocess.PIPE, stdout= 
subprocess.PIPE, shell =True)
-                               for line in iter(process.stderr.readline, b''):
-                                       print(line)
-                               process.communicate()
-                       #plot barplots
-                       tmp_overview.close()
-                       process = subprocess.Popen(" ".join(["Rscript --vanilla 
", join(os.path.dirname(__file__), "barplot.R"),join(tmp, "overview.csv"),
-                                                                               
                 join(arguments["output"], 
"QCResults/Report/src/img")]),stderr=subprocess.PIPE, stdout=subprocess.PIPE, 
shell=True)
-                       for line in iter(process.stderr.readline, b''):
-                               print(line)
-                       process.communicate()
-                       shutil.copy(join(os.path.dirname(__file__), 
"batch_report.html"), join(arguments["output"], "QCResults", "Report", 
"batch_report.html"))
-               except:
-                       print("Couldnt plot boxplots.")
-               try:
-                       summary_latex_name = join(arguments["output"], 
"QCResults", "Report", "summary.tex")
-                       with open(summary_latex_name, "w") as summary_latex:
-                               
summary_latex.write("\\rowcolors{2}{gray!10}{}\n\\begin{table}[H]\\begin{tabular}{lcc}\n")
-                               summary_latex.write(
-                                       "\\rowcolor{tableBlue} \\textbf{Sample} 
& \\textbf{Map to reference [\#]} & \\textbf{Map to reference[\%]} & 
\\textbf{Reads after trimming [\#]} & \\textbf{Reads after trimming [\%]} & 
\\textbf{Classified (Kraken)} & \\textbf{Shorter fragments [\%]} \\ \n")
-
-                               for i in range(0, len(batch_json["summary"])):
-                                       summary_latex.write(
-                                               
str(batch_json["summary"][i]["setname"]) + " & " +
-                                               
str(batch_json["summary"][i]["Map to reference [#]"]) + " & " +
-                                               
str(batch_json["summary"][i]["Map to reference [%]"]) + " & " +
-                                               
str(batch_json["summary"][i]["Reads after trimming [#]"]) + " & " +
-                                               
str(batch_json["summary"][i]["Reads after trimming [%]"]) + " & " +
-                                               
str(batch_json["summary"][i]["Classified (Kraken)"]) + " & " +
-                                               
str(batch_json["summary"][i]["Shorter fragments"]) + "\\\\\n"
-                                       )
-                               summary_latex.write("\\end{tabular}\n")
-                               summary_latex.write("\\caption{Statistics on 
reads after trimming. }")
-                               summary_latex.close()
-               except:
-
-                       print("Couldnt write summary.tex file.")
        finally:
 
                shutil.rmtree(tmp)
                delete_files(join(arguments["output"],"QCResults"), "FastQC", 
"_fastqc.html")
                delete_files(join(arguments["output"],"QCResults"), 
"Trimmed/FastQC", "_fastqc.html")
-
+ 
 if __name__ == "__main__":
        config = configparser.ConfigParser()
        config.read(join(dirname(__file__), "config.txt"))
@@ -492,7 +505,7 @@ if __name__ == "__main__":
        parser.add_argument( '-2', dest='r2', help = "input file", 
required=False)
 
        parser.add_argument('-output', dest='output', default="")
-       parser.add_argument('-technology', 
dest='technology',choices=['Illumina',"IonTorrent"] ,required = True)
+       parser.add_argument('-technology', 
dest='technology',choices=['Illumina',"IonTorrent"] ,required=False , help = 
"If not set, automatically determine technology and search for fastq and bam 
files. Set technology to IonTorrent if all files are bam-files, else set 
technology to Illumina.")
        parser.add_argument('-threads', dest='threads', default = 4, type = 
int, help= "Number of threads. Default:  %(default)s")
        parser.add_argument('-adapter', dest = 'adapter', choices = 
['TruSeq2-PE', 'TruSeq2-SE' , 'TruSeq3-PE' , 'TruSeq3-SE' , 'TruSeq3-PE-2', 
'NexteraPE-PE' ])
        parser.add_argument('-reference', dest='reference', required = False, 
help = "Map reads against reference")
@@ -503,16 +516,25 @@ if __name__ == "__main__":
        parser.add_argument('-db', dest='db', help='Kraken database', required 
= False, default= kraken_db)
        parser.add_argument('-palindrome', dest='palindrome', default=30, help= 
'Use palindrome parameter 30 or 1000 for further analysis. Default: 
%(default)s', type = int)
        parser.add_argument('-minlen', default=50,dest='minlen',help='Minlen 
parameter for Trimmomatic. Default: %(default)s',  type=int)
-       parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo 
or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | 
SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15', 
type= str)
+       parser.add_argument('-trimOption', dest="trimOption", help='Use maxinfo 
or slidinginfo for Trimmomatic.MAXINFO:<target length>:<strictness> | 
SLIDINGWINDOW:<window size>:<required quality>.', type= str)
        parser.add_argument('-version', action='version', version='%(prog)s v' 
+ __version__)
        parser.add_argument('-nokraken', action = "store_true")
        parser.add_argument('-nomapping', action = "store_true")
+       parser.add_argument('-notrimming', action = "store_true")
        parser.add_argument('-gz', action = "store_true")
        parser.add_argument('-trimBetter', action = "store_true", help= 
"Optimize trimming parameter using 'Per sequence base content' from fastqc. Not 
recommended for amplicons.")
        parser.add_argument('-trimBetter_threshold', 
dest='trimBetter_threshold', help = "Set -trimBetter to use this option.Default 
setting for Illumina: 0.15 and for IonTorrent: 0.25.", required=False, 
type=float)
        parser.add_argument('-forAssembly', action = "store_true", help = "Set 
-trimBetter to use this option.Trim parameter are optimized for assemblies 
(trim more aggressive).")
        parser.add_argument('-forMapping', action = "store_true", help = "Set 
-trimBetter to use this option.Trim parameter are optimized for mapping (allow 
more errors).")
        parser.add_argument('-rename', dest="rename", required=False, help="TSV 
File with two columns: <old sample name> <new sample name>")
+       parser.add_argument('-parameters', dest = 'parameters', default = 
join(dirname(__file__), "parameter.txt"))
 
        arguments = vars(parser.parse_args())
+       parameter = configparser.ConfigParser(strict=False, interpolation=None)
+       parameter.read(arguments['parameters'])
+
+       r1_pattern = parameter["Pattern"]["R1"]
+       r2_pattern = parameter["Pattern"]["R2"]
+       sep_pattern = parameter["Pattern"]["sep"]
+       lane_pattern = parameter["Pattern"]["lane"]
        main(arguments)
diff --git a/barplot.R b/barplot.R
index 350342c..93dfd20 100755
--- a/barplot.R
+++ b/barplot.R
@@ -1,29 +1,45 @@
 options(warn=-1)
-library(ggplot2)
+require(jsonlite)
+require(ggplot2)
+
 args = commandArgs(trailingOnly=TRUE)
 
-summary<- as.data.frame(read.csv(args[1]))
+convert2filename<- function(string, ext=".png"){
+  string<- gsub("\\[%\\]", "percentage", string)
+  string<- gsub("\\[#\\]", "number", string)
+  string<- gsub(" ", "_", string)
+  return(paste(string, ext, sep=""))
+}
 
-ggplot(summary, aes(x=Sample, y = Trimmed )) +
-  geom_bar(stat = "identity",  fill="#67a9cf") +
-  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
-  ggtitle("Number of reads after trimming") +
-  xlab("Sample")+
-  ylab("Reads [#]")
-ggsave(paste(args[2], "/trimming.png", sep=""))
+summary_json<- jsonlite::fromJSON(args[1])$summary
+tablenames<- names(summary_json)[!names(summary_json) %in% c("images", "Trim 
Parameter")]
 
-ggplot(summary, aes(x=Sample, y = Mapped )) +
-  geom_bar(stat = "identity",  fill="#67a9cf") +
-  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
-  ggtitle("Number of reads mapped to reference") +
+summary_json<- summary_json[tablenames]
+#summary<- as.data.frame(read.csv(args[1]))
+for( i in tablenames[2:length(tablenames)]){
+
+ggplot(summary_json, aes(x=summary_json[,"setname"], y = summary_json[,i]), 
environment = environment()) +
+  geom_bar(stat = "identity",   fill="#67a9cf") +
+  theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5), 
legend.position = "none") +
+  ggtitle(i) +
   xlab("Sample")+
-  ylab("Reads [#]")
-ggsave(paste(args[2], "/mapping.png", sep=""))
+  ylab(i)
+  ggsave(paste(args[2], convert2filename(i), sep="/"))
 
-ggplot(summary, aes(x=Sample, y = Classified )) +
-  geom_bar(stat = "identity",  fill="#67a9cf") +
+}
+temp_json <- data.frame(rbind(cbind( setname = summary_json$setname, type = 
"Total reads [#]", value= summary_json$`Total reads [#]`),
+                              cbind( setname = summary_json$setname, type = 
"Reads after trimming [#]", value= summary_json$`Reads after trimming [#]`)
+                   ))
+temp_json$value <- as.numeric(as.character(temp_json$value))
+
+ggplot(temp_json, aes(x= ))
+
+ggplot(temp_json, aes(x=setname, y = value, by=type, fill=type))+
+  geom_bar(stat="identity",position = "identity", alpha=  0.9) +
   theme(axis.text.x=element_text(angle=90, hjust=1, vjust = 0.5)) +
-  ggtitle("Precentage of reads classified by Kraken") +
+  ggtitle("Number of Reads") +
   xlab("Sample")+
-  ylab("Reads [%]")
-ggsave(paste(args[2], "/kraken.png", sep=""))
+  ylab("Number of Reads")
+ggsave(paste(args[2], "number_of_reads.png", sep="/"))
+
+
diff --git a/batch_report.html b/batch_report.html
index 23fc0f4..104a42b 100755
--- a/batch_report.html
+++ b/batch_report.html
@@ -573,18 +573,18 @@
                         </br>
                                                <div class="col-md-4">
 
-                        <a ng-click="selected.title='Number of reads remained 
after trimming'; selected.caption=''; selected.img='src/img/trimming.png'"
+                        <a ng-click="selected.title='Number of reads remained 
after trimming'; selected.caption=''; 
selected.img='src/img/Reads_after_trimming_number.png'"
                                                           data-image-id="" 
data-toggle="modal"
                                                           
data-target="#image-gallery">
-                                                               <img 
height="500px" ng-src="src/img/trimming.png">
+                                                               <img 
height="500px" ng-src="src/img/Reads_after_trimming_number.png">
                         </a>
                                                </div>
                         <div class="col-md-4">
 
-                        <a ng-click="selected.title='Number of reads mapped to 
reference'; selected.caption=''; selected.img='src/img/mapping.png'"
+                        <a ng-click="selected.title='Number of reads mapped to 
reference'; selected.caption=''; 
selected.img='src/img/Map_to_reference_number.png'"
                                                           data-image-id="" 
data-toggle="modal"
                                                           
data-target="#image-gallery">
-                                                               <img 
height="500px" ng-src="src/img/mapping.png">
+                                                               <img 
height="500px" ng-src="src/img/Map_to_reference_number.png">
                         </a>
                                                </div>
 
@@ -593,7 +593,7 @@
                         <a ng-click="selected.title='Percentage of reads 
classified by Kraken'; selected.caption=''; selected.img='src/img/kraken.png'"
                                                           data-image-id="" 
data-toggle="modal"
                                                           
data-target="#image-gallery">
-                                                               <img 
height="500px" ng-src="src/img/kraken.png">
+                                                               <img 
height="500px" ng-src="src/img/Classified_(Kraken).png">
                         </a>
                                                </div>
 
diff --git a/classes.py b/classes.py
index bbfb42f..3066dde 100755
--- a/classes.py
+++ b/classes.py
@@ -79,18 +79,17 @@ class Sample(object):
        def get_all_qc_plots(self):
                qc_dict = OrderedDict([("setname", self.name),("raw",[]), 
("trimmed",[])])
                for rs in self.readSets:
-                       qc_dict["raw"].append(rs.r1.qcRes.get_plots())
-                       
qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.get_plots())
+                       qc_dict["raw"].append(rs.r1.qcRes.plots)
+                       
qc_dict["trimmed"].append(rs.trimRes.readset.r1.qcRes.plots)
                        if (rs.r2 is not None):
-                               qc_dict["raw"].append(rs.r2.qcRes.get_plots())
-                               
qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.get_plots())
+                               qc_dict["raw"].append(rs.r2.qcRes.plots)
+                               
qc_dict["trimmed"].append(rs.trimRes.readset.r2.qcRes.plots)
                return qc_dict
 
        def total_reads(self):
                total = 0
                for rs in self.readSets:
-                       if rs.trimRes:
-                               total += rs.trimRes.total
+                       total += rs.numberOfReads
                return total
 
        def pTrimmed(self):
@@ -112,15 +111,15 @@ class Sample(object):
                self.readSets.append(readSet)
                return self
 
-       def run_Kraken(self, db):
-               r1, r2 = self.get_all_reads()
+       def run_Kraken(self, db, trimmed):
+               r1, r2 = self.get_all_reads(trimmed)
                self.krakenRes = KrakenResult(r1,r2,db, self.name)
                return self
 
-       def run_Bowtie2(self, bowtie_index, save_mapping ):
-               r1, r2 = self.get_all_reads()
+       def run_Bowtie2(self, bowtie_index, save_mapping, trimmed ):
+               r1, r2 = self.get_all_reads(trimmed)
                self.mappingRes = MappingResult(self.reference, r1, r2, 
bowtie_index)
-               self.mappingRes = self.mappingRes.run_bowtie(self.threads, 
save_mapping)
+               self.mappingRes = self.mappingRes.run_bowtie(save_mapping)
                return self.mappingRes
 
        def get_all_reads(self, trimmed = True):
@@ -133,7 +132,10 @@ class Sample(object):
                return r1,r2
 
        def get_mean_trimRes(self):
-               return round(np.mean([x.trimRes.shorter_fragments_percentage 
for x in self.readSets]),2)
+               try:
+                       return 
round(np.mean([x.trimRes.shorter_fragments_percentage for x in 
self.readSets]),2)
+               except:
+                       return None
 
 class ReadSet:
        ''' Readset contains of r1 and r2'''
@@ -151,7 +153,6 @@ class ReadSet:
                self.numberOfReads = 0
                self.numberofTrimmedReads = 0
                self.trimRes = None
-               #self.outputDir = outputDir
                self.paired = True
                if len( basename(self.r1.filename).split("_")[:-3]) > 4:
                        self.setname = 
"_".join(basename(self.r1.filename).split("_")[:-3])
@@ -162,8 +163,10 @@ class ReadSet:
 
        def run_FastQC(self, output):
                self.r1 = self.r1.run_FastQC(output)
+               self.numberOfReads += self.r1.qcRes.total_reads
                if(self.r2 is not None):
                        self.r2 = self.r2.run_FastQC(output)
+                       self.numberOfReads += self.r1.qcRes.total_reads
                return self
 
        def run_Trimmomatic(self, adapter, palindrome, minlen, trimOption, 
betterTrimming,gz):
@@ -177,19 +180,14 @@ class ReadSet:
                return qc_results
 
        def get_all_qc_dict(self):
-               '''qc_dict =OrderedDict( {"raw":[],"trimmed":[]} )
-               qc_dict["raw"] = [[x.__dict__ for x in 
self.r1.qcRes.results_to_base64()]]
-               qc_dict["trimmed"] = [[x.__dict__ for x in 
self.trimRes.readset.r1.qcRes.results_to_base64()]]
-               if (self.r2 is not None):
-                       qc_dict["raw"].append([x.__dict__ for x in 
self.r2.qcRes.results_to_base64()])
-                       qc_dict["trimmed"].append([x.__dict__ for x in 
self.trimRes.readset.r2.qcRes.results_to_base64()])
-               return qc_dict'''
                qc_dict = OrderedDict({"raw": [], "trimmed": []})
                qc_dict["raw"] = [[x.__dict__ for x in 
self.r1.qcRes.results_for_report()]]
-               qc_dict["trimmed"] = [[x.__dict__ for x in 
self.trimRes.readset.r1.qcRes.results_for_report()]]
+               if self.trimRes:
+                       qc_dict["trimmed"] = [[x.__dict__ for x in 
self.trimRes.readset.r1.qcRes.results_for_report()]]
                if (self.r2 is not None):
                        qc_dict["raw"].append([x.__dict__ for x in 
self.r2.qcRes.results_for_report()])
-                       qc_dict["trimmed"].append([x.__dict__ for x in 
self.trimRes.readset.r2.qcRes.results_for_report()])
+                       if self.trimRes:
+                               qc_dict["trimmed"].append([x.__dict__ for x in 
self.trimRes.readset.r2.qcRes.results_for_report()])
                return qc_dict
 
 
@@ -202,7 +200,6 @@ class FastQFile:
                self.concat_files = None
 
        def run_FastQC(self, outputDir):
-               print("Run fastqc in fastqfile ", " 
".join([join(get_tool_path('fastqc'), "fastqc") , self.filename , "-o" , 
outputDir, "-t", str(num_threads), "--extract"]))
                process = subprocess.Popen([join(get_tool_path('fastqc'), 
"fastqc") , self.filename , "-o" , outputDir, "-t", str(num_threads), 
"--extract"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
                for line in iter(process.stderr.readline, b''):
                        line = line.decode("utf-8")
@@ -217,7 +214,6 @@ class FastQFile:
                        pattern = re.match("Quality encoding detected as 
(?P<phred>\w+\d{2})", line)
                        if pattern:
                                self.phred = pattern.group("phred")
-               print("")
                process.communicate()
                self.qcRes = QCResult(join(outputDir, 
getFilenameWithoutExtension(basename(self.filename)) + "_fastqc"), 
join(mainResultsPath, "Report", "src"))
                return self
@@ -231,8 +227,8 @@ class QCResult:
        def __init__(self, resultsPath, summary_output):
                self.path = resultsPath
                self.results = self.get_results()
-               #write summary csv for boxplots
-               write_fastqc_summary(resultsPath, summary_output)
+               #write summary csv for boxplots and assign total number of reads
+               self.total_reads = write_fastqc_summary(resultsPath, 
summary_output)
 
        def get_results(self):
                summary_list = []
@@ -254,36 +250,6 @@ class QCResult:
                                                
summary_list.append(Plot("NotExisting", row[1], "NotExisting"))
                return summary_list
 
-       def get_plots(self):
-               print("extracting data ", self.path)
-               store = False
-               data = {"name": basename(self.path).replace("_fastc",""),
-                               "Sequence_Length_Distribution": [],
-                               "Per_sequence_GC_content": [],
-                               "Per_base_sequence_quality": []}
-
-               with open(join(self.path, "fastqc_data.txt"), "r") as 
fastqcdatafile:
-                       box_values = []
-                       for line in iter(fastqcdatafile):
-                               if line.startswith("#"):
-                                       pass
-                               elif line.startswith(">>END_MODULE"):
-                                       if store==True:
-                                               data[key] = 
get_distribution(box_values, key)
-                                       store = False
-                                       key = None
-                                       box_values = []
-                               elif line.startswith(">>"):
-                                       key = line.split("\t")[0]
-                                       key=key.replace(">>","")
-                                       key = key.replace(" ","_")
-                                       if key in data.keys():
-                                               store = True
-                               else:
-                                       if store:
-                                               box_values.append(line)
-               return data
-
        def results_to_base64(self):
                converted_plot = []
                for plot in self.results:
@@ -357,6 +323,7 @@ class TrimResult:
                self.logs = ""
 
                #Results
+               self.total=0
                self.shorter_fragments_percentage = 0
                self.input_number = 0
                self.nTrimmed = 0
@@ -454,6 +421,8 @@ class TrimResult:
                                process = subprocess.Popen(" ".join(call), 
stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
                                survived1, total, nTrimmedReads, pTrimmedReads, 
log = trimmomatic_report(process, call[0], True)
                                process.communicate()
+               if survived1==0:
+                       sys.exit("No reads remained after trimming. Check 
fastqc results before trimming in QCResults/Fastqc. Please rerun without 
-trimBetter or check -trimOptions and minlen again.")
 
                self.total = total
                self.nTrimmed = nTrimmedReads
@@ -521,17 +490,18 @@ class MappingResult:
                index_name = join(index, 
getFilenameWithoutExtension(basename(reference)) +"_bt2")  # reference name 
"bowtie2 build index_name"
                if not isdir(index):
                        return index
-               process = 
subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference,  
index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
+               if not exists(index_name + ".1.bt2"):
+                       process = 
subprocess.Popen([join(get_tool_path("bowtie2"),"bowtie2-build"),reference,  
index_name ], stderr=subprocess.PIPE, stdout = subprocess.PIPE)
 
-               for line in iter(process.stderr.readline, b''):
-                       line = line.decode("utf-8")
-                       print(line)
-               process.communicate()
-               print("Bowtie2-build. Done.")
+                       for line in iter(process.stderr.readline, b''):
+                               line = line.decode("utf-8")
+                               print(line)
+                       process.communicate()
+                       print("Bowtie2-build. Done.")
                return index_name
 
 
-       def run_bowtie(self, threads, save_mapping):
+       def run_bowtie(self, save_mapping):
                print("Run Bowtie2")
                call = [join(get_tool_path("bowtie2"), "bowtie2"), "-x", 
self.index, "--no-unal --threads ", str(num_threads)]
 
diff --git a/config.txt b/config.txt
index 124f887..258a35a 100755
--- a/config.txt
+++ b/config.txt
@@ -1,10 +1,10 @@
 [DEFAULT]
-kraken_db =/opt/common/pipelines/databases/minikraken_20141208/
+kraken_db = /opt/common/pipelines/databases/minikraken_20141208/
 
 [PATH]
 kraken =
-adapter = /data/GS/tools/Trimmomatic-0.32/adapters/
+adapter = /opt/common/pipelines/tools/Trimmomatic-0.36/adapters/
 fastqc = /opt/common/pipelines/tools/FastQC_0.11.5/
-trimmomatic =
+trimmomatic = /opt/common/pipelines/tools/Trimmomatic-0.36/
 bowtie2 =
 
diff --git a/helper.py b/helper.py
index d7fb524..c849e5a 100755
--- a/helper.py
+++ b/helper.py
@@ -151,7 +151,7 @@ def optimize_trimming(fastqc_folder,perc=0.1):
                        column[nucl].mask[-(i + 5):(min(len(mytable[nucl]), 
len(mytable[nucl]) - i))] = True
                        if (perc_slope(numpy.mean(mytable[nucl][-(i + 6): 
(min(len(mytable[nucl]) - 1, len(mytable[nucl]) - 1 - 
i))]),numpy.mean(column[nucl]), perc=perc)) & (tailcrop > len(mytable[nucl]) - 
(i + 5)):
                                # if perc_slope(numpy.var(mytable[nucl][-(i+6): 
(min(len(mytable["A"])-1, len(mytable[nucl])-1-i))]), 
numpy.mean(numpy.var(column)), perc=perc):
-                               print("TAILCROP", tailcrop)
+                               #print("TAILCROP", tailcrop)
                                tailcrop = len(mytable[nucl]) - (i + 5)
                                trim_bool = True
                        else:
@@ -215,9 +215,9 @@ def delete_files(path, extended="", pattern="_fastqc"):
                        else:
                                shutil.rmtree(filename)
 
-
+'''
 def get_distribution(lines, key):
-       '''creates CSV used for boxplotting'''
+       #creates CSV used for boxplotting
        output = []
        for line in lines:
                line = line.strip()
@@ -235,7 +235,7 @@ def get_distribution(lines, key):
                        v = fields[0]  # quality or gc [%]
                        output.extend(int(v) * [int(float(fields[1]))])
        return output
-
+'''
 def createCSV(dataset, lines, key, path, trimmed):
        '''creates CSV used for boxplotting'''
        #output = []
@@ -280,6 +280,8 @@ def write_fastqc_summary(fastqcdir, output):
                if line.startswith("#"):
                        pass  # print("comment")
                # continue
+               elif line.startswith("Total Sequences"):
+                       total_seq = int(line.split("\t")[-1])
                elif line.startswith('>>END_MODULE'):
                        if len(data) > 0:
                                name1 = basename(fastqcdir)
@@ -300,6 +302,7 @@ def write_fastqc_summary(fastqcdir, output):
                if store:
                        data.extend([line])
        fastqcdatafile.close()
+       return total_seq
 
 def write_SAV(folder, output):
        print("Plot SAV images...")
diff --git a/parameter.txt b/parameter.txt
index 14aeca2..ea2902e 100755
--- a/parameter.txt
+++ b/parameter.txt
@@ -35,13 +35,12 @@ trimOption = SLIDINGWINDOW:4:15
 trimBetter_threshold = 0.25
 trimOption = SLIDINGWINDOW:4:15
 
-[Adapter]
-Illumina Universal Adapter = TruSeq2
-
 [Fileextension]
-fastq = .fastq.gz, .fastq, .fq, .fq.gz, .bam
+fastq = .fastq.gz, .fastq, .fq, .fq.gz
+bam = .bam
 
 [Pattern]
-R1 = _R1_
-R2 = _R2_
-lane = _L\d{3}_
\ No newline at end of file
+sep=_
+R1 = R1
+R2 = R2
+lane = L\d{3}
\ No newline at end of file
diff --git a/report.tex b/report.tex
index e687aa0..65558cc 100755
--- a/report.tex
+++ b/report.tex
@@ -67,9 +67,11 @@ Processed reads: \\
 %-------------------- Summary -------------------%
 \begin{tcolorbox}
 {\large{Summary} } \\
-{{sample.nTrimmed()}} of {{sample.total_reads()}} ({{sample.pTrimmed()}}\%) 
reads remained after trimming \\
+{% if sample.nTrimmed() != 0 %} {{sample.nTrimmed()}} of 
{{sample.total_reads()}} ({{sample.pTrimmed()}}\%) reads remained after 
trimming \\
 {{sample.get_mean_trimRes()}} \% of fragments were shorter than read length\\
-
+{% else %}
+No trimming was performed \\
+{% endif %}
 {%if sample.mappingRes !=None%}{{sample.mappingRes.numberOfAlignedReads}} 
({{sample.mappingRes.percentAlignedReads}}\%) of reads aligned to \path{ 
{{sample.reference}} }\\ {%endif%}
 {%if sample.krakenRes !=None%}{{sample.krakenRes.nClassified}} sequences 
classified ({{sample.krakenRes.pClassified}}\%) {%endif%}
 
@@ -117,12 +119,13 @@ Trimming Log: \\
            
{\includegraphics[width=\textwidth]{/{{read.r1.qcRes.results[i].file}}} }
         \end{subfigure}
         \end{adjustbox}\qquad
-        \begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = 
{{read.trimRes.readset.r1.qcRes.results[i].color}} 2}%
-        \begin{subfigure}%
+        {% if sample.nTrimmed() != 0 %} 
\begin{adjustbox}{minipage=0.4\textwidth-2\fboxrule, cframe = 
{{read.trimRes.readset.r1.qcRes.results[i].color}} 2}%
+            \begin{subfigure}%
            
{\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r1.qcRes.results[i].file}}}
 }
         \end{subfigure}
         \end{adjustbox}
         \caption{ {{read.trimRes.readset.r1.qcRes.results[i].name}}}
+        {% endif %}
     \end{figure}
 {% endfor %}
 
@@ -145,12 +148,13 @@ Concatenated files: \\
            
{\includegraphics[width=\textwidth]{/{{read.r2.qcRes.results[i].file}}} }
         \end{subfigure}
         \end{adjustbox}\qquad
-        \begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = 
{{read.trimRes.readset.r2.qcRes.results[i].color}} 2}%
+        {% if sample.nTrimmed() != 0 %} 
\begin{adjustbox}{minipage=0.4\textwidth -2\fboxrule, cframe = 
{{read.trimRes.readset.r2.qcRes.results[i].color}} 2}%
         \begin{subfigure}%
             
{\includegraphics[width=\textwidth]{/{{read.trimRes.readset.r2.qcRes.results[i].file}}}
 }
         \end{subfigure}
         \end{adjustbox}
         \caption{ {{read.trimRes.readset.r2.qcRes.results[i].name}}}
+        {% endif %}
     \end{figure}
 {% endfor %}
 {% endif %}

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/qcumber.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

[med-svn] [qcumber] 02/06: New upstream version 1.0.9+dfsg

Reply via email to