This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository metabit.
commit 028b2292853c3d69629a5966d59266ab5e4a3666 Author: Andreas Tille <[email protected]> Date: Tue Feb 14 09:27:20 2017 +0100 New upstream version 0.0+20160923 --- nodes/samtools.py | 16 ---------- nodes/tools/get_stats.py | 14 +++++++-- nodes/tools/statax_Rmodule/doBarplot.R | 29 +++++++++++++++--- nodes/tools/statax_Rmodule/doClust.R | 54 +++++++++++++++++++++------------- nodes/tools/statax_Rmodule/doDiv.R | 44 ++++++++++++++++++++++----- nodes/tools/statax_Rmodule/doHeatmap.R | 23 +++++++++++---- nodes/tools/statax_Rmodule/doPcoa.R | 33 +++++++++++++-------- 7 files changed, 144 insertions(+), 69 deletions(-) diff --git a/nodes/samtools.py b/nodes/samtools.py index df7c1ef..3e54c22 100644 --- a/nodes/samtools.py +++ b/nodes/samtools.py @@ -13,15 +13,6 @@ from pypeline.atomiccmd.builder import \ import pypeline.common.versions as versions -# _VERSION_REGEX = r"Version: (\d+)\.(\d+)(?:\.(\d+))?" -# SAMTOOLS_VERSION = versions.Requirement(call=("samtools",), -# search=_VERSION_REGEX, -# checks=(0, 1, 18)) - -## v0.2.0 was the pre-release version of v1.0, and lacks required features -#_COMMON_CHECK = versions.And(versions.GE(0, 1, 18), -# versions.LT(0, 2, 0)) - _COMMON_CHECK = versions.GE(1, 1, 0) SAMTOOLS_VERSION = versions.Requirement( @@ -39,13 +30,6 @@ class SortSamNode(CommandNode): IN_SAM = input_file, OUT_STDOUT = AtomicCmd.PIPE, CHECK_VERSION = SAMTOOLS_VERSION) - # old samtoolsx0.1 - # cmd_sortbam = AtomicCmd(["samtools", "sort", "-f", "-", "%(OUT_BAM)s"], - # IN_STDIN = cmd_sam2bam, - # OUT_BAM = output_file) - # new samtoolsx1 - ## samtools view -buS reads.singles.bowtie2out.sam | samtools sort -T temp_root -o here.bam -O bam - - cmd_sortbam = AtomicCmd(["samtools", "sort", "-T", "%(TEMP_OUT_PREFIX)s", "-o", "%(OUT_BAM)s", diff --git a/nodes/tools/get_stats.py b/nodes/tools/get_stats.py index bfe6934..52f7125 100755 --- a/nodes/tools/get_stats.py +++ b/nodes/tools/get_stats.py @@ -13,7 +13,8 @@ import re import sys import pysam import os.path - +import shlex +from subprocess import check_output from glob import glob @@ -96,7 +97,16 @@ def get_bowtie2stats(file_): return tot, mapped -def get_readcount(bamfile): + +def get_readcount(bamfile, cmd="samtools view -c {}".format): + if bamfile: + readcount = check_output(shlex.split(cmd(bamfile))) + return(int(readcount.rstrip("\n")), ) + else: + return (None, ) + + +def _get_readcount(bamfile): if bamfile: readcount = pysam.view("-c", bamfile)[0] return (int(readcount),) diff --git a/nodes/tools/statax_Rmodule/doBarplot.R b/nodes/tools/statax_Rmodule/doBarplot.R index ffbab72..32f5441 100755 --- a/nodes/tools/statax_Rmodule/doBarplot.R +++ b/nodes/tools/statax_Rmodule/doBarplot.R @@ -20,18 +20,28 @@ doBarplot <- function(d, out.pdf=NULL, if ( sample.order=="none" || ncol(d)<2 ) { sample.order <- seq_len(ncol(d)) + in.bp <- d } else if (sample.order=="alpha") { sample.order <- order(colnames(d)) + in.bp <- d[,sample.order] } else { sample.order <- hclust(dist(t(d), method=sample.order))$order + in.bp <- d[,sample.order] } - in.bp <- d[,sample.order] + color.gradient <- gglike_colors(nrow(d)) taxa.names <- rownames(in.bp) legend.order <- seq_along(taxa.names) if (!is.null(uncl_col)) { - in.bp <- rbind(in.bp, unclassified=100-colSums(in.bp)) + if(ncol(in.bp) < 2){ + in.bp <- rbind(in.bp, "unclassified"=100-sum(in.bp[,1])) + } else { + in.bp <- rbind(in.bp, "unclassified"=100-colSums(in.bp)) + } + + # #in.bp <- rbind(in.bp, unclassified=100-colSums(in.bp)) + color.gradient <- c(color.gradient, uncl_col) taxa.names <- c(sort(taxa.names), "unclassified") legend.order <- match(taxa.names, sort(taxa.names)) @@ -114,10 +124,21 @@ if( !interactive() ) { d <- read.table(cl$args[1], row.names=1, sep='\t', as.is=TRUE) if( row.names(d)[1] == "ID" ) { names(d) <- d[1,] - d <- d[-1,] + if(ncol(d) < 2){ + d <- subset(d, row(d)>=2) + } else { + d <- d[-1,] + } } taxa.names <- rownames(d) - d <- apply(d, 2, as.numeric) + + if(ncol(d) < 2){ + d[,1] <- as.numeric(d[,1]) + } else { + d <- apply(d, 2, as.numeric) + } + + rownames(d) <- taxa.names doBarplot(d, out.pdf=cl$args[2], diff --git a/nodes/tools/statax_Rmodule/doClust.R b/nodes/tools/statax_Rmodule/doClust.R index f88cd81..ff77485 100755 --- a/nodes/tools/statax_Rmodule/doClust.R +++ b/nodes/tools/statax_Rmodule/doClust.R @@ -8,13 +8,24 @@ source(paste0(statax_dir, "/pvegclust-internal.R")) source(paste0(statax_dir, "/pvegclust.R")) read.abundance.table <- function(filename) { - d <- read.table(cl$args[1], row.names=1, sep='\t', as.is=TRUE) + d <- read.table(filename, row.names=1, sep='\t', as.is=TRUE) if( row.names(d)[1] == "ID" ) { names(d) <- d[1,] - d <- d[-1,] + if(ncol(d) < 2){ + d <- subset(d, row(d)>=2) + } else { + d <- d[-1,] + } } taxa.names <- rownames(d) - d <- apply(d, 2, as.numeric) + + if(ncol(d) < 2){ + d[,1] <- as.numeric(d[,1]) + } else { + d <- apply(d, 2, as.numeric) + } + + rownames(d) <- taxa.names return(d) } @@ -33,24 +44,6 @@ doClust <- function(d, out.clust=NULL, out.clust.pdf=NULL, out.clust.tsv=NULL, paste0(out.clust,".RData"), out.clust.RData) } - if (ncol(d) < 3) - msg <- c("Error: hierarchical clustering cannot be done with less than 3 samples", - paste0("(Input file: ", cl$args[1], ")")) - if (nrow(d) < 3) - msg <- c("Error: hierarchical clustering cannot be done with less than 3 objects (taxa)", - paste0("(Input file: ", cl$args[1], ")")) - if (exists("msg")) { - write(msg, file=stderr()) - if( !interactive() ) { - pdf(out.clust.pdf) - plot.new() - text(0, c(1, .95), msg, adj=0, col='red') - invisible(dev.off()) - write(msg, file=out.clust.tsv) - save(msg, file=out.clust.RData) - quit(save="no", status=0) - } - } if (ncores>1) { pvclust.func <- function(...) { @@ -125,6 +118,25 @@ if( !interactive() ) { d <- read.abundance.table(cl$args[1]) + if (ncol(d) < 3) + msg <- c("Error: hierarchical clustering cannot be done with less than 3 samples", + paste0("(Input file: ", cl$args[1], ")")) + if (nrow(d) < 3) + msg <- c("Error: hierarchical clustering cannot be done with less than 3 objects (taxa)", + paste0("(Input file: ", cl$args[1], ")")) + if (exists("msg")) { + write(msg, file=stderr()) + if( !interactive() ) { + pdf(cl$options$pdf) + plot.new() + text(0, c(1, .95), msg, adj=0, col='red') + invisible(dev.off()) + write(msg, file=cl$options$tsv) + save(msg, file=cl$options$RData) + quit(save="no", status=0) + } + } + doClust(d, out.clust=cl$options$basename, out.clust.pdf=cl$options$pdf, out.clust.tsv=cl$options$tsv, diff --git a/nodes/tools/statax_Rmodule/doDiv.R b/nodes/tools/statax_Rmodule/doDiv.R index 4d25e77..3ad347f 100755 --- a/nodes/tools/statax_Rmodule/doDiv.R +++ b/nodes/tools/statax_Rmodule/doDiv.R @@ -4,10 +4,19 @@ read.abundance.table <- function(filename) { d <- read.table(filename, row.names=1, sep='\t', as.is=TRUE) if( row.names(d)[1] == "ID" ) { names(d) <- d[1,] - d <- d[-1,] + if(ncol(d) < 2){ + d <- subset(d, row(d)>=2) + } else { + d <- d[-1,] + } } taxa.names <- rownames(d) - d <- apply(d, 2, as.numeric) + + if(ncol(d) < 2){ + d[,1] <- as.numeric(d[,1]) + } else { + d <- apply(d, 2, as.numeric) + } rownames(d) <- taxa.names return(d) } @@ -20,15 +29,21 @@ my.write.table <- function(Table, file, sep="\t", quote=F) { split.table <- function(Table, rm.strains=TRUE, default.taxlevels=c("k", "p", "c", "o", "f", "g", "s")) { - if( rm.strains ) Table <- Table[!grepl('\\|t__', row.names(Table)),] + if( rm.strains ) { + Table <- subset(Table, !grepl('\\|t__', row.names(Table))) + # Table <- Table[!grepl('\\|t__', row.names(Table)),] + } sample_names <- names(Table) taxa_names <- strsplit(row.names(Table), '|', fixed=T) taxlevel <- sapply(taxa_names, function(e) default.taxlevels[length(e)]) lasttaxon <- sapply(taxa_names, tail, 1) + ## Data1 <- sapply(default.taxlevels, function(x) Table[taxlevel==x,], + ## simplify=F, USE.NAMES=T) - Data <- sapply(default.taxlevels, function(x) Table[taxlevel==x,], + Data <- sapply(default.taxlevels, function(x) subset(Table, taxlevel==x), simplify=F, USE.NAMES=T) + Data #warning: you should check that the row.names do not contain duplicated taxa. } @@ -40,6 +55,7 @@ filter.table <- function(Data) { colnames(Data$k)[unclassified]), file=stderr()) Data <- sapply(Data, `[`, TRUE, !unclassified) + Data } else{ write("No samples are 100% unclassified", file=stderr()) Data @@ -48,7 +64,7 @@ filter.table <- function(Data) { doDiv <- function(Data, index="shannon") { suppressPackageStartupMessages(require(vegan, quietly=T)) - diversities <- sapply(Data, diversity, index=cl$options$index, MARGIN=2, + diversities <- sapply(Data, diversity, index=index, MARGIN=2, USE.NAMES=T) return(diversities) } @@ -79,10 +95,24 @@ if( !interactive() ) { default.taxlevels <- unlist(strsplit(cl$options$taxlevels, "")) Data <- split.table(Table, rm.strains=cl$options$rmstrains, default.taxlevels=default.taxlevels) - Data <- filter.table(Data) + Data <- filter.table(Data) + # diversity cannot calculate the distance when only one row, so setting to zero and add a dummyrow + for (d in names(Data)){ + if(dim(Data[[d]])[1]<2){ + Data[[d]] = Data[[d]] * 0 + Data[[d]] <-rbind(Data[[d]], 0) + } + } diversities <- doDiv(Data, index=cl$options$index) - + + if(is.null(dim(diversities))){ + diversities = t(as.data.frame(diversities)) + rownames(diversities) = colnames(Data[[1]])[1] # Get sample name + } else{ + + } + out.div <- stdout() if( length(cl$args) == 2 ) out.div <- cl$args[2] my.write.table(diversities, file=out.div) diff --git a/nodes/tools/statax_Rmodule/doHeatmap.R b/nodes/tools/statax_Rmodule/doHeatmap.R index 43e5dec..317f4c5 100755 --- a/nodes/tools/statax_Rmodule/doHeatmap.R +++ b/nodes/tools/statax_Rmodule/doHeatmap.R @@ -1,13 +1,23 @@ #!/usr/bin/env Rscript read.abundance.table <- function(filename) { - d <- read.table(cl$args[1], row.names=1, sep='\t', as.is=TRUE) + d <- read.table(filename, row.names=1, sep='\t', as.is=TRUE) if( row.names(d)[1] == "ID" ) { names(d) <- d[1,] - d <- d[-1,] + if(ncol(d) < 2){ + d <- subset(d, row(d)>=2) + } else { + d <- d[-1,] + } } taxa.names <- rownames(d) - d <- apply(d, 2, as.numeric) + + if(ncol(d) < 2){ + d[,1] <- as.numeric(d[,1]) + } else { + d <- apply(d, 2, as.numeric) + } + rownames(d) <- taxa.names return(d) } @@ -22,11 +32,12 @@ doHeatmap <- function(d, out.pdf=NULL, graph.title=NULL, taxon.title="Taxon", taxa.order <- hclust(dist(d))$order if( ncol(d)>2 ) { samples.order <- hclust(dist(t(d)))$order + heatmap.data <- d[taxa.order, samples.order] } else { - samples.order <- seq_len(ncol(d)) + # samples.order <- seq_len(ncol(d)) + heatmap.data <- d[taxa.order,,drop=FALSE] # drop=FALSE to keep data.frame } - - heatmap.data <- d[taxa.order, samples.order] + heatmap.data <- melt(t(heatmap.data), varnames=c("sample", taxon.title), value.name="abundance") diff --git a/nodes/tools/statax_Rmodule/doPcoa.R b/nodes/tools/statax_Rmodule/doPcoa.R index 4d8b7d0..cb493b2 100755 --- a/nodes/tools/statax_Rmodule/doPcoa.R +++ b/nodes/tools/statax_Rmodule/doPcoa.R @@ -40,19 +40,6 @@ doPCoA <- function(d, # column names will be used for the distance paste0(out.pcoa,".tsv"), out.pcoa.tsv) - if (nrow(d) <= 2) { - msg <- c("Error: PCoA cannot be done with less than 3 columns", - paste0("(Input file: ", cl$args[1], ")")) - write(msg, file=stderr()) - if( !interactive() ) { - pdf(out.pcoa.pdf) - plot.new() - text(0, c(1, .95), msg, adj=0, col='red') - invisible(dev.off()) - write(msg, file=out.pcoa.tsv) - quit(save="no", status=0) - } - } require(ape, quietly=T) suppressPackageStartupMessages(require(vegan, quietly=T)) @@ -121,6 +108,10 @@ doPCoA <- function(d, # column names will be used for the distance legend(x = par("usr")[2], y = par("usr")[4], legend = legend.text, pch=legend.pch, col=legend.out, pt.bg=legend.in, text.col=text.col, bty='n', xpd=TRUE, xjust=0, cex=.8, pt.cex=1.5, ncol=legend.cols) + +# number_of_samples = 37 ## uncomment and replace XX with number of the samples or libs analyzed. +# point.labels <- row.names(pcoa.result$vectors)[1:number_of_samples] +# text(x=x.pcoa.r[1:number_of_samples], y=y.pcoa.r[1:number_of_samples], labels=point.labels, font=1, cex=0.2, pos=3, col="black") if (!is.interactive) invisible(dev.off()) } @@ -170,8 +161,24 @@ if( !interactive() ) { option_list=option_list) cl <- parse_args(Parser, positional_arguments=1) + if (cl$args == "-") cl$args <- "stdin" d <- read.table(cl$args, row.names=1, sep='\t', as.is=TRUE) + + if (ncol(d) <= 2 ) { + msg <- c("Error: PCoA cannot be done with less than 3 samples", + paste0("(Input file: ", cl$args[1], ")")) + write(msg, file=stderr()) + if( !interactive() ) { + pdf(cl$options$pdf) + plot.new() + text(0, c(1, .95), msg, adj=0, col='red') + invisible(dev.off()) + write(msg, file=cl$options$tsv) + quit(save="no", status=0) + } + } + if( row.names(d)[1] == "ID" ) { names(d) <- d[1,] d <- d[-1,] -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/metabit.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
