This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository qcumber.
commit f256679c229082b397698f7aa771aa9adff17379 Author: Andreas Tille <[email protected]> Date: Thu Apr 20 09:00:39 2017 +0200 New upstream version 1.0.14+dfsg --- QCumber.py | 64 ++++++++++++++++++++++++------------------------ classes.py | 2 +- readme.md | 82 +++++++++++++++++++++++++++++++++----------------------------- report.tex | 21 +++++----------- 4 files changed, 83 insertions(+), 86 deletions(-) diff --git a/QCumber.py b/QCumber.py index ab279cb..9aa1319 100755 --- a/QCumber.py +++ b/QCumber.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 __author__ = 'LieuV' -__version__ = "1.0.12" +__version__ = "1.0.14" from classes import * from helper import * @@ -37,14 +37,14 @@ def get_illumina_reads(tmp): if not all([re.search(lane_pattern, x) for x in arguments["r1"]]): readname = re.sub(r1_pattern + ".*", "", os.path.basename(arguments["r1"][0])) if len(arguments["r1"]) != 1: - r1 = FastQFile(join_reads(arguments["r1"], tmp, readname + "_R1"), - [toLatex(os.path.basename(x)) for x in arguments["r1"]]) + r1 = FastQFile(join_reads(arguments["r1"], tmp, readname + "R1"), + [toLatex(os.path.basename(x)) for x in arguments["r1"]]) else: r1 = FastQFile(arguments["r1"][0]) if arguments["r2"]: if len(arguments["r2"]) != 1: - r2 = FastQFile(join_reads(arguments["r2"], tmp, readname + "_R2"), - [toLatex(os.path.basename(x)) for x in arguments["r2"]]) + r2 = FastQFile(join_reads(arguments["r2"], tmp, readname + "R2"), + [toLatex(os.path.basename(x)) for x in arguments["r2"]]) else: r2 = FastQFile(arguments["r2"][0]) readsets.append(ReadSet(r1, r2)) @@ -58,14 +58,14 @@ def get_illumina_reads(tmp): r1_reads = [x for x in arguments["r1"] if lane in x] readname = re.sub(r1_pattern + ".*", "", os.path.basename(r1_reads[0])) if len(arguments["r1"]) != 1: - r1 = FastQFile(join_reads(r1_reads, tmp, readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads] ) + r1 = FastQFile(join_reads(r1_reads, tmp, readname + "R1"), [toLatex(os.path.basename(x)) for x in r1_reads] ) else: r1 = FastQFile(r1_reads[0]) if arguments["r2"]: r2_reads = [x for x in arguments["r2"] if lane in x] if len(r2_reads) != 1: - r2 = FastQFile(join_reads(r2_reads, tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] ) + r2 = FastQFile(join_reads(r2_reads, tmp, readname + "R2"),[toLatex(os.path.basename(x)) for x in r2_reads] ) else: r2 = FastQFile(r2_reads[0]) readsets.append(ReadSet(r1,r2)) @@ -187,7 +187,7 @@ def runAnalyses(temp_bowtie_path, tmp): sample = sample.add_readSet(rs) if not arguments["nomapping"]: if arguments["save_mapping"]: - sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".bam"), not arguments["notrimming"]) + sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", sample.name +".sam"), not arguments["notrimming"]) else: sample.mappingRes = sample.run_Bowtie2(temp_bowtie_path, "/dev/null", not arguments["notrimming"]) if not arguments["nokraken"]: @@ -217,11 +217,11 @@ def writeReport(sample): latex.write(pdf_latex) latex.close() - process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE) + process = subprocess.Popen(" ".join(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex"]),shell=True, stdout = subprocess.DEVNULL, stderr = subprocess.PIPE) for line in iter(process.stderr.readline, b''): print(line) - process.communicate() + #os.system(" ".join(["pdflatex", "-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + ".tex > " + join(sample.mainResultsPath, "Report", "latex.log 2&")])) for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", ".synctex.gz"): try: @@ -318,26 +318,26 @@ def plot(): # # Plot BOXPLOTS boxplots = [{"file": "Per_sequence_quality_scores.csv", - "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"), - "title": "Per sequence quality scores", - "ylab": "Mean Sequence Quality (Phred Score)", - "xlab": "Sample"}, - {"file": "Sequence_Length_Distribution.csv", - "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"), - "title": "Sequence Length Distribution", - "ylab": "Sequence Length (bp)", - "xlab": "Sample"}, - {"file": "Per_sequence_GC_content.csv", - "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"), - "title": "Per sequence GC content", - "ylab": "Mean GC content (%)", - "xlab": "Sample"}] + "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_quality_scores.png"), + "title": "Per sequence quality scores", + "ylab": "Mean Sequence Quality (Phred Score)", + "xlab": "Sample"}, + {"file": "Sequence_Length_Distribution.csv", + "output": join(arguments["output"], "QCResults/Report/src/img", "Sequence_Length_Distribution.png"), + "title": "Sequence Length Distribution", + "ylab": "Sequence Length (bp)", + "xlab": "Sample"}, + {"file": "Per_sequence_GC_content.csv", + "output": join(arguments["output"], "QCResults/Report/src/img", "Per_sequence_GC_content.png"), + "title": "Per sequence GC content", + "ylab": "Mean GC content (%)", + "xlab": "Sample"}] for plot in boxplots: process = subprocess.Popen(" ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "boxplot.R"), - join(arguments["output"], "QCResults", "Report", "src", plot["file"]), - plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] + '"', - '"' + plot["ylab"] + '"']), - stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) + join(arguments["output"], "QCResults", "Report", "src", plot["file"]), + plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] + '"', + '"' + plot["ylab"] + '"']), + stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) #for line in iter(process.stderr.readline, b''): # print(line) process.communicate() @@ -346,7 +346,7 @@ def plot(): # Plot BARPLOTS process = subprocess.Popen( " ".join(["Rscript --vanilla ", join(os.path.dirname(__file__), "barplot.R"), join(arguments["output"], "QCResults/Report/src", "summary.json"), - join(arguments["output"], "QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) + join(arguments["output"], "QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) process.communicate() except: print("Couldnt plot summary") @@ -443,7 +443,7 @@ def main(arguments): "summary": [], "kraken": {}, "versions": Pipeline().__dict__}, - open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w")) + open(join(arguments["output"], "QCResults/Report/src", "summary.json"), "w")) output = join(arguments["output"], "QCResults") # getDir([arguments["output"], "QCResults"], True) @@ -537,8 +537,8 @@ if __name__ == "__main__": kraken_db = config["DEFAULT"]["kraken_db"] parser = argparse.ArgumentParser() - parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq", required=False) - parser.add_argument('-1' , dest='r1', help = "input file. Illumina filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", required=False) + parser.add_argument( '-input', dest='input', help = "input sample folder. Illumina filenames should end with _<lane>_<R1|R2>_number, e.g. Sample_12_345_R1_001.fastq, to find the right paired set.", required=False) + parser.add_argument('-1' , dest='r1', help = "input file", required=False) parser.add_argument( '-2', dest='r2', help = "input file", required=False) parser.add_argument('-output', dest='output', default="") diff --git a/classes.py b/classes.py index 3066dde..2cc296c 100755 --- a/classes.py +++ b/classes.py @@ -195,7 +195,7 @@ class FastQFile: def __init__(self, absFilename, concat_files = None): self.filename = absFilename self.qcRes = None - self.log = "" + #self.log = "" self.phred="phred33" self.concat_files = None diff --git a/readme.md b/readme.md index 59bb665..da25aab 100755 --- a/readme.md +++ b/readme.md @@ -2,15 +2,15 @@ Introduction ------------ -QCumber is a tool for quality control and exploration of NGS data. The workflow is as follows: +QCumber is a tool for quality control and exploration of NGS data. All steps can be skipped if required. The workflow is as follows: -* optional: extract information from Sequence Analysis Viewer +* extract information from Sequence Analysis Viewer * Quality control with FastQC -* Trim Reads with Trimmomatic -* optional: run FastQC and retrim if necessary +* Trim Reads with Trimmomatic +* run FastQC and retrim if necessary * Quality control of trimmed reads with FastQC -* optional: Map reads against reference using bowtie2 -* optional: Classify reads with Kraken +* Map reads against reference using bowtie2 +* Classify reads with Kraken ------------ Dependencies @@ -31,7 +31,7 @@ Packages via pip3 install: R packages: * ggplot2 * savR - +* jsonlite To change tool or adapter path, change config.txt. @@ -46,47 +46,52 @@ Input parameter: -i, -input sample folder/file. If Illumina folder, files has to match pattern <Sample name>_<lane>_<R1/R2>_<number>. Eg. Sample_12_345_R1_001.fastq. Otherwise use -1,-2 -1 , -2 alternatively to -i: filename. Must not match Illumina names. - -technology sequencing technology (Illumina/IonTorrent) + -adapter adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina. Options: - + -technology sequencing technology (Illumina/IonTorrent). Use Illumina if files are fastq -output output folder, default: input folder -reference reference file - -adapter adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina. + -threads number of threads + -sav Sequence Analysis Viewer folder. Requires Interop folder, RunInfo.xml and RunParameter.xml - -threads threads. Default:4 - -palindrome palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30 - -db Kraken database - -trimOption Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. + -rename Rename sample names in report. TSV File with two columns: <old sample name> <new sample name> + -parameters Use own standard parameter. + -trimOption Override standard trimming option. E.g. MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required quality>. default: SLIDINGWINDOW:4:15 -trimBetter Optimize trimming parameter using 'Per sequence base content' from fastqc -trimBetter_threshold Threshold for 'Per sequence base content' fluctuation. Default:0.15 -forAssembly Trim parameter are optimized for assemblies (trim more aggressive). -forMapping Trim parameter are optimized for mapping(allow more errors). -minlen Minlen parameter for Trimmomatic. Default:50 + -palindrome palindrome parameter used in Trimmomatic (use 30 or 1000 for further analysis). Default: 30 + -gz Output trimmed files as .gz + + -db Kraken database + -nokraken skip Kraken -index Bowtie2 index if available -save_mapping Save sam files - -nokraken skip Kraken -nomapping skip mapping - - -version Get version + -notrimming skip trimming + + -version Get version Output: <Sample/Output Folder> -|-- QCResult - |-- Report - |-- PDF report per sample - |-- HTML report for entire project - |-- src - |-- img - |-- Summary images - |-- FastQC - |-- <output folder(s) from FastQC> - |-- Trimmed - |-- <trimmed reads> - |-- FastQC - |-- <output folder(s) from FastQC> +* QCResult + * Report + - PDF report per sample + - HTML report for entire project + * src + * img + - Summary images + * FastQC + - <output folder(s) from FastQC> + * Trimmed + - <trimmed reads> + * FastQC + - <output folder(s) from FastQC> ------------------- Program Description @@ -94,14 +99,15 @@ Program Description This project consists of 6 files: -QCumber.py main script for running complete pipeline -classes.py script containing classes -helper.py small helper functions -report.tex Template for sample reports -config.txt configuration for Kraken and Trimmomatic -boxplot.R boxplots of fastqc output for batch report -paramter.txt default parameter -config.txt tool location +* QCumber.py main script for running complete pipeline +* classes.py script containing classes +* helper.py small helper functions +* report.tex Template for sample reports +* batch_report.html Template for batch report +* config.txt path to tools and adapter file +* boxplot.R boxplots of fastqc output for batch report +* barplot.R barplots of read statistics +* parameter.txt default parameter for trimming, set pattern for Illumina names,.. ------- diff --git a/report.tex b/report.tex index 65558cc..c0059e4 100755 --- a/report.tex +++ b/report.tex @@ -30,7 +30,7 @@ \begin{document} {\bf {\LARGE{ {{pipeline.name}} } Version {{pipeline.version}} } }\\ -\line(1,0){ \textwidth } +\line(1,0){\textwidth} \begin{tabular}{p{0.25\textwidth} p{0.75\textwidth}} @@ -55,7 +55,7 @@ Trimmomatic: & {{pipeline.trimmo_version}}\\ \end{tabular}\\ %----------------- Workflow -------------------% -\line(1,0){ \textwidth } \\ +\line(1,0){\textwidth} \\ Processed reads: \\ {%for read in sample.readSets %} {{read.r1.get_filename()}} @@ -77,7 +77,7 @@ No trimming was performed \\ \end{tcolorbox} -\line(1,0){\textwidth} \\ +\line(1,0){\textwidth} \\ \vspace{5mm} %-------------------- FASTQC Results -------------------% @@ -97,20 +97,11 @@ Concatenated files:\\ {%endfor%} \end{itemize} {%endif%} -{{read.r1.log}} \\ Trimming Log: \\ \textcolor{gray}{Using parameter: {{trim_param}} }\\ {{read.trimRes.logs}} \\ -{% if read.trimRes.blobplot != "" %} - \begin{figure}[H] - \centering - {\includegraphics[width=0.8 \textwidth]{/{{read.trimRes.blobplot}}} } - \caption{Blobplot} - \end{figure} -{% endif %} -% {% for i in range(read.r1.qcRes.results|length) %} \begin{figure}[H] \centering @@ -139,7 +130,7 @@ Concatenated files: \\ {%endfor%} \end{itemize} {%endif%} -{{read.r2.log}} + {% for i in range(read.r2.qcRes.results|length) %} \begin{figure}[H] \centering @@ -163,7 +154,7 @@ Concatenated files: \\ %-------------------- Bowtie Results -------------------% {%if sample.mappingRes != None%} -\line(1,0){\textwidth} +\line(1,0){\textwidth} \vspace{5mm} {\Large{Bowtie2} } - Map against \path{ {{sample.reference}} } \\ @@ -171,7 +162,7 @@ Concatenated files: \\ {%endif%} %-------------------- Kraken Results -------------------% {%if sample.krakenRes != None%} -\line(1,0){\textwidth} \\ +\line(1,0){\textwidth} \vspace{5mm} {\Large{Kraken} } \\ -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qcumber.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
