[med-svn] [qcumber] 01/05: New upstream version 1.0.14+dfsg

Andreas Tille Thu, 20 Apr 2017 01:12:52 -0700

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository qcumber.


commit f256679c229082b397698f7aa771aa9adff17379
Author: Andreas Tille <[email protected]>
Date:   Thu Apr 20 09:00:39 2017 +0200

    New upstream version 1.0.14+dfsg
---
 QCumber.py | 64 ++++++++++++++++++++++++------------------------
 classes.py |  2 +-
 readme.md  | 82 +++++++++++++++++++++++++++++++++-----------------------------
 report.tex | 21 +++++-----------
 4 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/QCumber.py b/QCumber.py
index ab279cb..9aa1319 100755
--- a/QCumber.py
+++ b/QCumber.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 __author__ = 'LieuV'
-__version__ = "1.0.12"
+__version__ = "1.0.14"
 
 from classes import *
 from helper import *
@@ -37,14 +37,14 @@ def get_illumina_reads(tmp):
        if not all([re.search(lane_pattern, x) for x in arguments["r1"]]):
                readname = re.sub(r1_pattern + ".*", "", 
os.path.basename(arguments["r1"][0]))
                if len(arguments["r1"]) != 1:
-                       r1 = FastQFile(join_reads(arguments["r1"], tmp, 
readname + "_R1"),
-                                      [toLatex(os.path.basename(x)) for x in 
arguments["r1"]])
+                       r1 = FastQFile(join_reads(arguments["r1"], tmp, 
readname + "R1"),
+                                                  
[toLatex(os.path.basename(x)) for x in arguments["r1"]])
                else:
                        r1 = FastQFile(arguments["r1"][0])
                if arguments["r2"]:
                        if len(arguments["r2"]) != 1:
-                               r2 = FastQFile(join_reads(arguments["r2"], tmp, 
readname + "_R2"),
-                                              [toLatex(os.path.basename(x)) 
for x in arguments["r2"]])
+                               r2 = FastQFile(join_reads(arguments["r2"], tmp, 
readname + "R2"),
+                                                          
[toLatex(os.path.basename(x)) for x in arguments["r2"]])
                        else:
                                r2 = FastQFile(arguments["r2"][0])
                        readsets.append(ReadSet(r1, r2))
@@ -58,14 +58,14 @@ def get_illumina_reads(tmp):
                        r1_reads = [x for x in arguments["r1"] if lane in x]
                        readname = re.sub(r1_pattern + ".*", "", 
os.path.basename(r1_reads[0]))
                        if len(arguments["r1"]) != 1:
-                               r1 = FastQFile(join_reads(r1_reads, tmp, 
readname + "_R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
+                               r1 = FastQFile(join_reads(r1_reads, tmp, 
readname + "R1"), [toLatex(os.path.basename(x)) for x in r1_reads]  )
                        else:
                                r1 = FastQFile(r1_reads[0])
                        if arguments["r2"]:
                                r2_reads = [x for x in arguments["r2"] if lane 
in x]
 
                                if len(r2_reads) != 1:
-                                       r2 = FastQFile(join_reads(r2_reads, 
tmp, readname + "_R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
+                                       r2 = FastQFile(join_reads(r2_reads, 
tmp, readname + "R2"),[toLatex(os.path.basename(x)) for x in r2_reads] )
                                else:
                                        r2 = FastQFile(r2_reads[0])
                                readsets.append(ReadSet(r1,r2))
@@ -187,7 +187,7 @@ def runAnalyses(temp_bowtie_path, tmp):
                        sample = sample.add_readSet(rs)
                if not arguments["nomapping"]:
                        if arguments["save_mapping"]:
-                               sample.mappingRes = 
sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", 
sample.name +".bam"), not arguments["notrimming"])
+                               sample.mappingRes = 
sample.run_Bowtie2(temp_bowtie_path, join(arguments["output"], "QCResults", 
sample.name +".sam"), not arguments["notrimming"])
                        else:
                                sample.mappingRes = 
sample.run_Bowtie2(temp_bowtie_path, "/dev/null", not arguments["notrimming"])
                if not arguments["nokraken"]:
@@ -217,11 +217,11 @@ def writeReport(sample):
        latex.write(pdf_latex)
        latex.close()
 
-       process = subprocess.Popen(["pdflatex", "-interaction=nonstopmode", 
"-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + 
".tex"], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+       process = subprocess.Popen(" ".join(["pdflatex", 
"-interaction=nonstopmode", "-output-directory=" + join(sample.mainResultsPath, 
"Report"), report_name + ".tex"]),shell=True, stdout = subprocess.DEVNULL, 
stderr = subprocess.PIPE)
        for line in iter(process.stderr.readline, b''):
                print(line)
-
        process.communicate()
+       #os.system(" ".join(["pdflatex", "-interaction=nonstopmode", 
"-output-directory=" + join(sample.mainResultsPath, "Report"), report_name + 
".tex > " + join(sample.mainResultsPath, "Report", "latex.log 2&")]))
 
        for ext in (".tex",".aux", ".log", ".toc", ".lof", ".lot", 
".synctex.gz"):
                try:
@@ -318,26 +318,26 @@ def plot():
                #
                # Plot BOXPLOTS
                boxplots = [{"file": "Per_sequence_quality_scores.csv",
-                            "output": join(arguments["output"], 
"QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
-                            "title": "Per sequence quality scores",
-                            "ylab": "Mean Sequence Quality (Phred Score)",
-                            "xlab": "Sample"},
-                           {"file": "Sequence_Length_Distribution.csv",
-                            "output": join(arguments["output"], 
"QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
-                            "title": "Sequence Length Distribution",
-                            "ylab": "Sequence Length (bp)",
-                            "xlab": "Sample"},
-                           {"file": "Per_sequence_GC_content.csv",
-                            "output": join(arguments["output"], 
"QCResults/Report/src/img", "Per_sequence_GC_content.png"),
-                            "title": "Per sequence GC content",
-                            "ylab": "Mean GC content (%)",
-                            "xlab": "Sample"}]
+                                        "output": join(arguments["output"], 
"QCResults/Report/src/img", "Per_sequence_quality_scores.png"),
+                                        "title": "Per sequence quality scores",
+                                        "ylab": "Mean Sequence Quality (Phred 
Score)",
+                                        "xlab": "Sample"},
+                                       {"file": 
"Sequence_Length_Distribution.csv",
+                                        "output": join(arguments["output"], 
"QCResults/Report/src/img", "Sequence_Length_Distribution.png"),
+                                        "title": "Sequence Length 
Distribution",
+                                        "ylab": "Sequence Length (bp)",
+                                        "xlab": "Sample"},
+                                       {"file": "Per_sequence_GC_content.csv",
+                                        "output": join(arguments["output"], 
"QCResults/Report/src/img", "Per_sequence_GC_content.png"),
+                                        "title": "Per sequence GC content",
+                                        "ylab": "Mean GC content (%)",
+                                        "xlab": "Sample"}]
                for plot in boxplots:
                        process = subprocess.Popen(" ".join(["Rscript --vanilla 
", join(os.path.dirname(__file__), "boxplot.R"),
-                                                            
join(arguments["output"], "QCResults", "Report", "src", plot["file"]),
-                                                            plot["output"], 
'"' + plot["title"] + '"', '"' + plot["xlab"] + '"',
-                                                            '"' + plot["ylab"] 
+ '"']),
-                                                  stderr=subprocess.PIPE, 
stdout=subprocess.PIPE, shell=True)
+                                                                               
                 join(arguments["output"], "QCResults", "Report", "src", 
plot["file"]),
+                                                                               
                 plot["output"], '"' + plot["title"] + '"', '"' + plot["xlab"] 
+ '"',
+                                                                               
                 '"' + plot["ylab"] + '"']),
+                                                                          
stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
                        #for line in iter(process.stderr.readline, b''):
                        #       print(line)
                        process.communicate()
@@ -346,7 +346,7 @@ def plot():
                # Plot BARPLOTS
                process = subprocess.Popen(
                        " ".join(["Rscript --vanilla ", 
join(os.path.dirname(__file__), "barplot.R"), join(arguments["output"], 
"QCResults/Report/src", "summary.json"),
-                                 join(arguments["output"], 
"QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, 
shell=True)
+                                         join(arguments["output"], 
"QCResults/Report/src/img")]), stderr=subprocess.PIPE, stdout=subprocess.PIPE, 
shell=True)
                process.communicate()
        except:
                print("Couldnt plot summary")
@@ -443,7 +443,7 @@ def main(arguments):
                        "summary": [],
                        "kraken": {},
                        "versions": Pipeline().__dict__},
-           open(join(arguments["output"], "QCResults/Report/src", 
"summary.json"), "w"))
+               open(join(arguments["output"], "QCResults/Report/src", 
"summary.json"), "w"))
 
        output = join(arguments["output"], "QCResults")  # 
getDir([arguments["output"], "QCResults"], True)
 
@@ -537,8 +537,8 @@ if __name__ == "__main__":
                        kraken_db = config["DEFAULT"]["kraken_db"]
 
        parser = argparse.ArgumentParser()
-       parser.add_argument( '-input', dest='input', help = "input sample 
folder. Illumina filenames have to end with _<lane>_<R1|R2>_number, e.g. 
Sample_12_345_R1_001.fastq", required=False)
-       parser.add_argument('-1' , dest='r1', help = "input file. Illumina 
filename must not match <project>_<lane>_<R1|R2>_<number> name pattern", 
required=False)
+       parser.add_argument( '-input', dest='input', help = "input sample 
folder. Illumina filenames should end with _<lane>_<R1|R2>_number, e.g. 
Sample_12_345_R1_001.fastq, to find the right paired set.", required=False)
+       parser.add_argument('-1' , dest='r1', help = "input file", 
required=False)
        parser.add_argument( '-2', dest='r2', help = "input file", 
required=False)
 
        parser.add_argument('-output', dest='output', default="")
diff --git a/classes.py b/classes.py
index 3066dde..2cc296c 100755
--- a/classes.py
+++ b/classes.py
@@ -195,7 +195,7 @@ class FastQFile:
        def __init__(self, absFilename, concat_files = None):
                self.filename = absFilename
                self.qcRes = None
-               self.log = ""
+               #self.log = ""
                self.phred="phred33"
                self.concat_files = None
 
diff --git a/readme.md b/readme.md
index 59bb665..da25aab 100755
--- a/readme.md
+++ b/readme.md
@@ -2,15 +2,15 @@
 Introduction 
 ------------
 
-QCumber is a tool for quality control and exploration of NGS data. The 
workflow is as follows:
+QCumber is a tool for quality control and exploration of NGS data. All steps 
can be skipped if required. The workflow is as follows:
 
-* optional: extract information from Sequence Analysis Viewer
+* extract information from Sequence Analysis Viewer
 * Quality control with FastQC
-* Trim Reads with Trimmomatic 
-* optional: run FastQC and retrim if necessary
+* Trim Reads with Trimmomatic
+* run FastQC and retrim if necessary
 * Quality control of trimmed reads with FastQC
-* optional: Map reads against reference using bowtie2
-* optional: Classify reads with Kraken
+* Map reads against reference using bowtie2
+* Classify reads with Kraken
 
 ------------
 Dependencies
@@ -31,7 +31,7 @@ Packages via pip3 install:
 R packages:
 * ggplot2
 * savR
-
+* jsonlite
 
 To change tool or adapter path, change config.txt.
 
@@ -46,47 +46,52 @@ Input parameter:
        -i, -input              sample folder/file. If Illumina folder, files 
has to match pattern <Sample name>_<lane>_<R1/R2>_<number>. 
                                        Eg. Sample_12_345_R1_001.fastq. 
Otherwise use -1,-2
        -1 , -2         alternatively to -i: filename. Must not match Illumina 
names.
-       -technology             sequencing technology (Illumina/IonTorrent)
+    -adapter        adapter sequence (TruSeq2-PE, TruSeq2-SE, TruSeq3-PE, 
TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
 
 Options:
-
+    -technology                sequencing technology (Illumina/IonTorrent). 
Use Illumina if files are fastq
        -output                     output folder, default: input folder
        -reference              reference file
-       -adapter                adapter sequence (TruSeq2-PE, TruSeq2-SE, 
TruSeq3-PE, TruSeq3-SE, TruSeq3-PE-2, NexteraPE-PE). Required for Illumina.
+       -threads                number of threads
+
        -sav                                    Sequence Analysis Viewer 
folder. Requires Interop folder, RunInfo.xml and RunParameter.xml
-       -threads                threads. Default:4
-       -palindrome                             palindrome parameter used in 
Trimmomatic (use 30 or 1000 for further analysis). Default: 30
-       -db                     Kraken database
-       -trimOption             Override standard trimming option. E.g. 
MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required 
quality>. 
+       -rename                 Rename sample names in report. TSV File with 
two columns: <old sample name> <new sample name>
+       -parameters             Use own standard parameter.
+       -trimOption             Override standard trimming option. E.g. 
MAXINFO:<target length>:<strictness> | SLIDINGWINDOW:<window size>:<required 
quality>.
                             default: SLIDINGWINDOW:4:15
        -trimBetter                             Optimize trimming parameter 
using 'Per sequence base content' from fastqc
        -trimBetter_threshold   Threshold for 'Per sequence base content' 
fluctuation. Default:0.15
        -forAssembly                    Trim parameter are optimized for 
assemblies (trim more aggressive).
        -forMapping                             Trim parameter are optimized 
for mapping(allow more errors).
        -minlen                 Minlen parameter for Trimmomatic. Default:50
+       -palindrome                             palindrome parameter used in 
Trimmomatic (use 30 or 1000 for further analysis). Default: 30
+    -gz                     Output trimmed files as .gz
+
+       -db                     Kraken database
+       -nokraken                               skip Kraken
        -index                                  Bowtie2 index if available
        -save_mapping           Save sam files
-       -nokraken                               skip Kraken
        -nomapping                              skip mapping
-       
-       -version                Get version
+    -notrimming             skip trimming
+
+    -version                Get version
 
 Output:
 
 <Sample/Output Folder>
-|-- QCResult
-  |-- Report
-         |-- PDF report per sample
-         |-- HTML report for entire project
-         |-- src
-             |-- img
-                  |-- Summary images
-  |-- FastQC
-      |-- <output folder(s) from FastQC>
-  |-- Trimmed
-      |-- <trimmed reads>
-      |-- FastQC
-          |-- <output folder(s) from FastQC>
+* QCResult
+    * Report
+        - PDF report per sample
+        - HTML report for entire project
+        * src
+            * img
+                - Summary images
+    * FastQC
+        - <output folder(s) from FastQC>
+    * Trimmed
+        - <trimmed reads>
+        * FastQC
+            - <output folder(s) from FastQC>
 
 -------------------
 Program Description
@@ -94,14 +99,15 @@ Program Description
 
 This project consists of 6 files:
 
-QCumber.py             main script for running complete pipeline
-classes.py             script containing classes
-helper.py              small helper functions
-report.tex             Template for sample reports
-config.txt      configuration for Kraken and Trimmomatic
-boxplot.R              boxplots of fastqc output for batch report 
-paramter.txt   default parameter
-config.txt             tool location
+* QCumber.py           main script for running complete pipeline
+* classes.py           script containing classes
+* helper.py                small helper functions
+* report.tex           Template for sample reports
+* batch_report.html Template for batch report
+* config.txt        path to tools and adapter file
+* boxplot.R                boxplots of fastqc output for batch report
+* barplot.R         barplots of read statistics
+* parameter.txt            default parameter for trimming, set pattern for 
Illumina names,..
 
 
 -------
diff --git a/report.tex b/report.tex
index 65558cc..c0059e4 100755
--- a/report.tex
+++ b/report.tex
@@ -30,7 +30,7 @@
 \begin{document}
 
 {\bf {\LARGE{ {{pipeline.name}} } Version {{pipeline.version}}   } }\\
-\line(1,0){ \textwidth }
+\line(1,0){\textwidth} 
 
 \begin{tabular}{p{0.25\textwidth} p{0.75\textwidth}}
 
@@ -55,7 +55,7 @@ Trimmomatic: & {{pipeline.trimmo_version}}\\
 \end{tabular}\\
 
 %----------------- Workflow -------------------%
-\line(1,0){ \textwidth } \\
+\line(1,0){\textwidth} \\
 Processed reads: \\
 {%for read in sample.readSets %}
 {{read.r1.get_filename()}}
@@ -77,7 +77,7 @@ No trimming was performed \\
 
 \end{tcolorbox}
 
-\line(1,0){\textwidth} \\
+\line(1,0){\textwidth}  \\
 \vspace{5mm}
 
 %-------------------- FASTQC Results -------------------%
@@ -97,20 +97,11 @@ Concatenated files:\\
 {%endfor%}
 \end{itemize}
 {%endif%}
-{{read.r1.log}} \\
 
 Trimming Log: \\
 \textcolor{gray}{Using parameter:  {{trim_param}} }\\
 {{read.trimRes.logs}} \\
 
-{% if read.trimRes.blobplot != "" %}
-     \begin{figure}[H]
-     \centering
-    {\includegraphics[width=0.8 \textwidth]{/{{read.trimRes.blobplot}}} }
-    \caption{Blobplot}
-     \end{figure}
-{% endif %}
-%
 {% for i in range(read.r1.qcRes.results|length) %}
     \begin{figure}[H]
         \centering
@@ -139,7 +130,7 @@ Concatenated files: \\
 {%endfor%}
 \end{itemize}
 {%endif%}
-{{read.r2.log}}
+
 {% for i in range(read.r2.qcRes.results|length) %}
     \begin{figure}[H]
         \centering
@@ -163,7 +154,7 @@ Concatenated files: \\
 
 %-------------------- Bowtie Results -------------------%
 {%if sample.mappingRes != None%}
-\line(1,0){\textwidth}
+\line(1,0){\textwidth} 
 \vspace{5mm}
 
 {\Large{Bowtie2} } - Map against \path{ {{sample.reference}} } \\
@@ -171,7 +162,7 @@ Concatenated files: \\
 {%endif%}
 %-------------------- Kraken Results -------------------%
 {%if sample.krakenRes != None%}
-\line(1,0){\textwidth} \\
+\line(1,0){\textwidth} 
 \vspace{5mm}
 
 {\Large{Kraken} } \\

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/qcumber.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

[med-svn] [qcumber] 01/05: New upstream version 1.0.14+dfsg

Reply via email to