Commits (2)
#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
library(edgeR)
library(DESeq2)
library("RColorBrewer")
library("gplots")
library(qusage)
rowMax <- function(x) apply(x,1,max)
col.grp <- function(n,b) {
colorvec <- vector(mode="character", length=length(n))
plotcols <- rainbow(length(b))
for (i in 1:length(n)) {
for (j in 1:length(b)) {
if ( n[i] == b[j] ) {
colorvec[i] = plotcols[j]
}
}
}
c(colorvec)
}
#################### Read in Data ################################
genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
ct <- tbl[,4:length(tbl)]
row.names(ct) <- tbl$ENSEMBL
samples<- names(ct)
dtbl <- read.table('design.txt',header=TRUE,sep="\t")
samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
colData <- samtbl[c('SampleGroup','SubjectID')]
row.names(colData) <- samtbl$samples
dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
dds <- dds[ rowMax(counts(dds)) > 30, ]
dds <- dds[ colSums(counts(dds)) > 1000000]
if (nrow(counts(dds)) < 1) {
print(paste("Samples are filtered if there is < 1M reads. There are less than no remaining sample(s) after this filter",sep=' '))
q()
}
countTable <- counts(dds)
#grps <- as.character(levels(colData(dds)$SampleGroup))
grps <- as.character(colData(dds)$SampleGroup)
col.blocks <-col.grp(grps,levels(factor(grps)))
libSizes <- as.vector(colSums(countTable))
logcpm <- tbl2[,4:length(tbl2)]
row.names(logcpm) <- tbl2$SYMBOL
tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
row.names(tmp.tab) <- tmp.tab$Group.1
mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
#################### Run DESEQ2 ################################
dds <- DESeq(dds)
rld <- rlogTransformation(dds, blind=TRUE)
sampleDists <- dist(t(assay(rld)))
png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
dev.off()
#Compare Samples using PCA
png(file="pca.png",bg ="transparent",height=768,width=1024)
print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
dev.off()
#Do all pairwise comparisons
contrast <- resultsNames(dds)
cond<- levels(colData(dds)$SampleGroup)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$pvalue
output$logFC <- output$log2FoldChange
output$fdr <- output$padj
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
}
}
}
}
###################### Run EdgeR ########################
###################### Run QuSage ######################
MSIG.geneSets <- read.gmt('geneset.gmt')
design <- model.matrix(~grps)
d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
d <- calcNormFactors(d)
d <- estimateCommonDisp(d)
png(file="mds.png",bg ="transparent",height=768,width=1024)
plotMDS(d, labels=grps,col=col.blocks)
legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
dev.off()
cond <-levels(d$samples$group)
colnames(design) <- levels(d$samples$group)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
c <- exactTest(d, pair=c(cond[j],cond[i]))
res <- c$table
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$PValue
output$logFC <- output$logFC
output$fdr <- p.adjust(output$rawP, method ='fdr')
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
gcont <- paste(cond[j],cond[i],sep='-')
qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
}
}
}
}
###################### Run Limma VOOM ########################
# design <- model.matrix(~0+grps)
# colnames(design) <- grpnames
# a <- length(cond)-1
# design.pairs <- c()
# k <- 0
# for (i in 1:a) {
# for (j in 2:length(cond)) {
# if (i == j) {
# next
# } else {
# k <- k+1
# design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
# }
# }
# }
#contrast.matrix <- makeContrasts(design.pairs,levels=design)
#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
#d <- calcNormFactors(d)
#d <- estimateCommonDisp(d)
# v <- voom(d,design,plot=TRUE)
# fit <- lmFit(v,design)
# fit2 <- contrasts.fit(fit, contrast.matrix)
# fit2 <- eBayes(fit2)
# comps <- colnames(fit2$coefficients)
# for (i in 1:length(comps)) {
# res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
# res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
# output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
# output$rawP <- output$P.Value
# output$logFC <- output$adj.P.Val
# output$fdr <- p.adjust(output$rawP, method ='fdr')
# output$bonf <- p.adjust(output$rawP, method ='bonferroni')
# write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
# filt.out <- na.omit(output[output$fdr < 0.05,])
# if (nrow(filt.out) > 2) {
# subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
# gnames <- filt.out[c('ensembl','symbol')]
# s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
# STREE <- hclust(dist(t(subset)))
# zscores <- scale(t(subset))
# ngenes <- length(colnames(zscores))
# textscale <- (1/(ngenes/30))
# if (textscale > 1) {
# textscale <-1
# }
# if (textscale < 0.1) {
# textscale <- 0.1
# }
# png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
# heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
# legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
# dev.off()
# }
# }
../genect_rnaseq/DEA_htseq.R
\ No newline at end of file
library(ballgown)
args<-commandArgs(TRUE)
sample_dirs <- args
samples <- gsub('_stringtie','',sample_dirs)
design <- "design.txt"
samtbl <- read.table(file=design,header=TRUE,sep='\t')
bg <- ballgown(samples=sample_dirs, meas='all')
samples <- gsub('_stringtie','',sampleNames(bg))
mergetbl <- merge(as.data.frame(samples),samtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
pData(bg) = data.frame(id=samples, group=as.character(mergetbl$SampleGroup))
save(bg,file="bg.rda")
../genect_rnaseq/build_ballgown.R
\ No newline at end of file
#!/usr/bin/perl -w
#concat_edgeR.pl
#symbol chrom start end ensembl type logFC logCPM PValue monocytes neutrophils rawP fdr bonf
my @files = @ARGV;
open OUT, ">edgeR.results.txt" or die $!;
print OUT join("\t",'G1','G2','ENSEMBL','SYMBOL','TYPE','logFC','logCPM',
'pval','fdr','G1.Mean','G2.Mean'),"\n";
foreach $f (@files) {
open IN, "<$f" or die $!;
my ($fname,$ext) = split(/\.edgeR\./,$f);
$fname =~ s/edgeR\///;
my ($g1,$g2) = split(/_/,$fname);
my $head = <IN>;
chomp($head);
my @colnames = split(/\t/,$head);
while (my $line = <IN>) {
chomp($line);
my @row = split(/\t/,$line);
my %hash;
foreach my $i (0..$#row) {
$hash{$colnames[$i]} = $row[$i];
}
print OUT join("\t",$g1,$g2,$hash{ensembl},$hash{symbol},$hash{type},
sprintf("%.2f",$hash{logFC}),
sprintf("%.2f",$hash{logCPM}),
sprintf("%.1e",$hash{rawP}),sprintf("%.1e",$hash{fdr}),
sprintf("%.0f",$hash{$g1}),sprintf("%.0f",$hash{$g2})),"\n";
}
}
../genect_rnaseq/concat_edgeR.pl
\ No newline at end of file
#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
library(edgeR)
library(DESeq2)
library("RColorBrewer")
library("gplots")
library(qusage)
rowMax <- function(x) apply(x,1,max)
col.grp <- function(n,b) {
colorvec <- vector(mode="character", length=length(n))
plotcols <- rainbow(length(b))
for (i in 1:length(n)) {
for (j in 1:length(b)) {
if ( n[i] == b[j] ) {
colorvec[i] = plotcols[j]
}
}
}
c(colorvec)
}
#################### Read in Data ################################
genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
ct <- tbl[,4:length(tbl)]
row.names(ct) <- tbl$ENSEMBL
samples<- names(ct)
dtbl <- read.table('design.txt',header=TRUE,sep="\t")
samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
colData <- samtbl[c('SampleGroup','SubjectID')]
row.names(colData) <- samtbl$samples
dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
dds <- dds[ rowMax(counts(dds)) > 30, ]
dds <- dds[ colSums(counts(dds)) > 1000000]
if (nrow(counts(dds)) < 1) {
print(paste("Samples are filtered if there is < 1M reads. There are less than no remaining sample(s) after this filter",sep=' '))
q()
}
countTable <- counts(dds)
#grps <- as.character(levels(colData(dds)$SampleGroup))
grps <- as.character(colData(dds)$SampleGroup)
col.blocks <-col.grp(grps,levels(factor(grps)))
libSizes <- as.vector(colSums(countTable))
logcpm <- tbl2[,4:length(tbl2)]
row.names(logcpm) <- tbl2$SYMBOL
tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
row.names(tmp.tab) <- tmp.tab$Group.1
mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
#################### Run DESEQ2 ################################
dds <- DESeq(dds)
rld <- rlogTransformation(dds, blind=TRUE)
sampleDists <- dist(t(assay(rld)))
png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
dev.off()
#Compare Samples using PCA
png(file="pca.png",bg ="transparent",height=768,width=1024)
print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
dev.off()
#Do all pairwise comparisons
contrast <- resultsNames(dds)
cond<- levels(colData(dds)$SampleGroup)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$pvalue
output$logFC <- output$log2FoldChange
output$fdr <- output$padj
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
}
}
}
}
###################### Run EdgeR ########################
###################### Run QuSage ######################
MSIG.geneSets <- read.gmt('geneset.gmt')
design <- model.matrix(~grps)
d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
d <- calcNormFactors(d)
d <- estimateCommonDisp(d)
png(file="mds.png",bg ="transparent",height=768,width=1024)
plotMDS(d, labels=grps,col=col.blocks)
legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
dev.off()
cond <-levels(d$samples$group)
colnames(design) <- levels(d$samples$group)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
c <- exactTest(d, pair=c(cond[j],cond[i]))
res <- c$table
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$PValue
output$logFC <- output$logFC
output$fdr <- p.adjust(output$rawP, method ='fdr')
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
gcont <- paste(cond[j],cond[i],sep='-')
qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
}
}
}
}
###################### Run Limma VOOM ########################
# design <- model.matrix(~0+grps)
# colnames(design) <- grpnames
# a <- length(cond)-1
# design.pairs <- c()
# k <- 0
# for (i in 1:a) {
# for (j in 2:length(cond)) {
# if (i == j) {
# next
# } else {
# k <- k+1
# design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
# }
# }
# }
#contrast.matrix <- makeContrasts(design.pairs,levels=design)
#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
#d <- calcNormFactors(d)
#d <- estimateCommonDisp(d)
# v <- voom(d,design,plot=TRUE)
# fit <- lmFit(v,design)
# fit2 <- contrasts.fit(fit, contrast.matrix)
# fit2 <- eBayes(fit2)
# comps <- colnames(fit2$coefficients)
# for (i in 1:length(comps)) {
# res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
# res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
# output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
# output$rawP <- output$P.Value
# output$logFC <- output$adj.P.Val
# output$fdr <- p.adjust(output$rawP, method ='fdr')
# output$bonf <- p.adjust(output$rawP, method ='bonferroni')
# write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
# filt.out <- na.omit(output[output$fdr < 0.05,])
# if (nrow(filt.out) > 2) {
# subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
# gnames <- filt.out[c('ensembl','symbol')]
# s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
# STREE <- hclust(dist(t(subset)))
# zscores <- scale(t(subset))
# ngenes <- length(colnames(zscores))
# textscale <- (1/(ngenes/30))
# if (textscale > 1) {
# textscale <-1
# }
# if (textscale < 0.1) {
# textscale <- 0.1
# }
# png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
# heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
# legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
# dev.off()
# }
# }
../genect_rnaseq/dea.R
\ No newline at end of file
#!/usr/bin/perl -w
#parse_gencode.pl
open OUT, ">genenames.txt" or die $!;
print OUT join("\t",'chrom','start','end','ensembl','symbol','type'),"\n";
my $gtf = shift @ARGV;
open GCODE, "<$gtf" or die $!;
while (my $line = <GCODE>) {
chomp($line);
next if ($line =~ m/^#/);
my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) =
split(/\t/,$line);
next unless ($feature eq 'gene');
$info =~ s/\"//g;
my %hash;
foreach $a (split(/;\s*/,$info)) {
my ($key,$val) = split(/ /,$a);
$hash{$key} = $val;
}
$hash{gene_id} =~ s/\.\d+//;
print OUT join("\t",$chrom,$start,$end,$hash{gene_id},$hash{gene_name},$hash{gene_type}),"\n";
}
../genect_rnaseq/gencode_genename.pl
\ No newline at end of file
#!/bin/bash
#rnaseqalign.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-b --BAM File"
echo "-g --GTF File"
echo "-s --stranded"
echo "-p --Prefix for output file name"
echo "Example: bash geneabudance.sh genect_rnaseq/geneabundance.sh -s stranded -g gtf_file -p pair_id -b bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:g:p:s:h opt
do
case $opt in
b) sbam=$OPTARG;;
g) gtf=$OPTARG;;
p) pair_id=$OPTARG;;
s) stranded=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]]
then
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
if [[ $SLURM_CPUS_ON_NODE > 64 ]]
then
SLURM_CPUS_ON_NODE=64
fi
source /etc/profile.d/modules.sh
module load subread/1.6.1 stringtie/1.3.2d-intel
featureCounts -s $stranded -M --fraction -J --ignoreDup -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
mkdir ${pair_id}_stringtie
cd ${pair_id}_stringtie
stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ../${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
../genect_rnaseq/geneabundance.sh
\ No newline at end of file
#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
library(edgeR)
library(DESeq2)
library("RColorBrewer")
library("gplots")
library(qusage)
rowMax <- function(x) apply(x,1,max)
col.grp <- function(n,b) {
colorvec <- vector(mode="character", length=length(n))
plotcols <- rainbow(length(b))
for (i in 1:length(n)) {
for (j in 1:length(b)) {
if ( n[i] == b[j] ) {
colorvec[i] = plotcols[j]
}
}
}
c(colorvec)
}
#################### Read in Data ################################
genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
ct <- tbl[,4:length(tbl)]
row.names(ct) <- tbl$ENSEMBL
samples<- names(ct)
dtbl <- read.table('design.txt',header=TRUE,sep="\t")
samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
colData <- samtbl[c('SampleGroup','SubjectID')]
row.names(colData) <- samtbl$samples
dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
dds <- dds[ rowMax(counts(dds)) > 30, ]
dds <- dds[ colSums(counts(dds)) > 1000000]
if (nrow(counts(dds)) < 1) {
print(paste("Samples are filtered if there is < 1M reads. There are less than no remaining sample(s) after this filter",sep=' '))
q()
}
countTable <- counts(dds)
#grps <- as.character(levels(colData(dds)$SampleGroup))
grps <- as.character(colData(dds)$SampleGroup)
col.blocks <-col.grp(grps,levels(factor(grps)))
libSizes <- as.vector(colSums(countTable))
logcpm <- tbl2[,4:length(tbl2)]
row.names(logcpm) <- tbl2$SYMBOL
tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
row.names(tmp.tab) <- tmp.tab$Group.1
mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
#################### Run DESEQ2 ################################
dds <- DESeq(dds)
rld <- rlogTransformation(dds, blind=TRUE)
sampleDists <- dist(t(assay(rld)))
png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
dev.off()
#Compare Samples using PCA
png(file="pca.png",bg ="transparent",height=768,width=1024)
print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
dev.off()
#Do all pairwise comparisons
contrast <- resultsNames(dds)
cond<- levels(colData(dds)$SampleGroup)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$pvalue
output$logFC <- output$log2FoldChange
output$fdr <- output$padj
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
}
}
}
}
###################### Run EdgeR ########################
###################### Run QuSage ######################
MSIG.geneSets <- read.gmt('geneset.gmt')
design <- model.matrix(~grps)
d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
d <- calcNormFactors(d)
d <- estimateCommonDisp(d)
png(file="mds.png",bg ="transparent",height=768,width=1024)
plotMDS(d, labels=grps,col=col.blocks)
legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
dev.off()
cond <-levels(d$samples$group)
colnames(design) <- levels(d$samples$group)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
c <- exactTest(d, pair=c(cond[j],cond[i]))
res <- c$table
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$PValue
output$logFC <- output$logFC
output$fdr <- p.adjust(output$rawP, method ='fdr')
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
gcont <- paste(cond[j],cond[i],sep='-')
qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
}
}
}
}
###################### Run Limma VOOM ########################
# design <- model.matrix(~0+grps)
# colnames(design) <- grpnames
# a <- length(cond)-1
# design.pairs <- c()
# k <- 0
# for (i in 1:a) {
# for (j in 2:length(cond)) {
# if (i == j) {
# next
# } else {
# k <- k+1
# design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
# }
# }
# }
#contrast.matrix <- makeContrasts(design.pairs,levels=design)
#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
#d <- calcNormFactors(d)
#d <- estimateCommonDisp(d)
# v <- voom(d,design,plot=TRUE)
# fit <- lmFit(v,design)
# fit2 <- contrasts.fit(fit, contrast.matrix)
# fit2 <- eBayes(fit2)
# comps <- colnames(fit2$coefficients)
# for (i in 1:length(comps)) {
# res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
# res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
# output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
# output$rawP <- output$P.Value
# output$logFC <- output$adj.P.Val
# output$fdr <- p.adjust(output$rawP, method ='fdr')
# output$bonf <- p.adjust(output$rawP, method ='bonferroni')
# write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
# filt.out <- na.omit(output[output$fdr < 0.05,])
# if (nrow(filt.out) > 2) {
# subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
# gnames <- filt.out[c('ensembl','symbol')]
# s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
# STREE <- hclust(dist(t(subset)))
# zscores <- scale(t(subset))
# ngenes <- length(colnames(zscores))
# textscale <- (1/(ngenes/30))
# if (textscale > 1) {
# textscale <-1
# }
# if (textscale < 0.1) {
# textscale <- 0.1
# }
# png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
# heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
# legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
# dev.off()
# }
# }
library(ballgown)
args<-commandArgs(TRUE)
sample_dirs <- args
samples <- gsub('_stringtie','',sample_dirs)
design <- "design.txt"
samtbl <- read.table(file=design,header=TRUE,sep='\t')
bg <- ballgown(samples=sample_dirs, meas='all')
samples <- gsub('_stringtie','',sampleNames(bg))
mergetbl <- merge(as.data.frame(samples),samtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
pData(bg) = data.frame(id=samples, group=as.character(mergetbl$SampleGroup))
save(bg,file="bg.rda")
#!/usr/bin/perl -w
#merge_tables.pl
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
use File::Basename;
my %opt = ();
my $results = GetOptions (\%opt,'outdir|o=s','help|h');
$opt{outdir} = './' unless ($opt{outdir});
my @files = @ARGV;
foreach $file (@files) {
chomp($file);
open IN, "<$file" or die $!;
$fname = basename($file);
my $sample = (split(/\./,$fname))[0];
$samps{$sample} = 1;
my $head = <IN>;
chomp($head);
my @colnames = split(/\t/,$head);
while (my $line = <IN>) {
chomp($line);
my @row = split(/\t/,$line);
my $gene = $row[0];
my $ct = $row[-1];
$cts{$gene}{$sample} = $ct;
}
close IN;
}
open OUT, ">$opt{outdir}\/countFeature.txt" or die $!;
my @samples = sort {$a cmp $b} keys %samps;
print OUT join("\t",'FeatureCtType',@samples),"\n";
foreach $gene (keys %cts) {
my @line;
foreach $sample (@samples) {
unless ($cts{$gene}{$sample}) {
$cts{$gene}{$sample} = 0;
}
push @line, $cts{$gene}{$sample};
}
print OUT join("\t",$gene,@line),"\n";
}
close OUT;
#!/usr/bin/perl -w
#concat_edgeR.pl
#symbol chrom start end ensembl type logFC logCPM PValue monocytes neutrophils rawP fdr bonf
my @files = @ARGV;
open OUT, ">edgeR.results.txt" or die $!;
print OUT join("\t",'G1','G2','ENSEMBL','SYMBOL','TYPE','logFC','logCPM',
'pval','fdr','G1.Mean','G2.Mean'),"\n";
foreach $f (@files) {
open IN, "<$f" or die $!;
my ($fname,$ext) = split(/\.edgeR\./,$f);
$fname =~ s/edgeR\///;
my ($g1,$g2) = split(/_/,$fname);
my $head = <IN>;
chomp($head);
my @colnames = split(/\t/,$head);
while (my $line = <IN>) {
chomp($line);
my @row = split(/\t/,$line);
my %hash;
foreach my $i (0..$#row) {
$hash{$colnames[$i]} = $row[$i];
}
print OUT join("\t",$g1,$g2,$hash{ensembl},$hash{symbol},$hash{type},
sprintf("%.2f",$hash{logFC}),
sprintf("%.2f",$hash{logCPM}),
sprintf("%.1e",$hash{rawP}),sprintf("%.1e",$hash{fdr}),
sprintf("%.0f",$hash{$g1}),sprintf("%.0f",$hash{$g2})),"\n";
}
}
#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
library(edgeR)
library(DESeq2)
library("RColorBrewer")
library("gplots")
library(qusage)
rowMax <- function(x) apply(x,1,max)
col.grp <- function(n,b) {
colorvec <- vector(mode="character", length=length(n))
plotcols <- rainbow(length(b))
for (i in 1:length(n)) {
for (j in 1:length(b)) {
if ( n[i] == b[j] ) {
colorvec[i] = plotcols[j]
}
}
}
c(colorvec)
}
#################### Read in Data ################################
genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
ct <- tbl[,4:length(tbl)]
row.names(ct) <- tbl$ENSEMBL
samples<- names(ct)
dtbl <- read.table('design.txt',header=TRUE,sep="\t")
samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
colData <- samtbl[c('SampleGroup','SubjectID')]
row.names(colData) <- samtbl$samples
dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
dds <- dds[ rowMax(counts(dds)) > 30, ]
dds <- dds[ colSums(counts(dds)) > 1000000]
if (nrow(counts(dds)) < 1) {
print(paste("Samples are filtered if there is < 1M reads. There are less than no remaining sample(s) after this filter",sep=' '))
q()
}
countTable <- counts(dds)
#grps <- as.character(levels(colData(dds)$SampleGroup))
grps <- as.character(colData(dds)$SampleGroup)
col.blocks <-col.grp(grps,levels(factor(grps)))
libSizes <- as.vector(colSums(countTable))
logcpm <- tbl2[,4:length(tbl2)]
row.names(logcpm) <- tbl2$SYMBOL
tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
row.names(tmp.tab) <- tmp.tab$Group.1
mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
#################### Run DESEQ2 ################################
dds <- DESeq(dds)
rld <- rlogTransformation(dds, blind=TRUE)
sampleDists <- dist(t(assay(rld)))
png(file="samples_heatmap.png",bg ="transparent",height=768,width=1024)
heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
dev.off()
#Compare Samples using PCA
png(file="pca.png",bg ="transparent",height=768,width=1024)
print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
dev.off()
#Do all pairwise comparisons
contrast <- resultsNames(dds)
cond<- levels(colData(dds)$SampleGroup)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$pvalue
output$logFC <- output$log2FoldChange
output$fdr <- output$padj
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
}
}
}
}
###################### Run EdgeR ########################
###################### Run QuSage ######################
MSIG.geneSets <- read.gmt('geneset.gmt')
design <- model.matrix(~grps)
d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
d <- calcNormFactors(d)
d <- estimateCommonDisp(d)
png(file="mds.png",bg ="transparent",height=768,width=1024)
plotMDS(d, labels=grps,col=col.blocks)
legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
dev.off()
cond <-levels(d$samples$group)
colnames(design) <- levels(d$samples$group)
a <- length(cond)-1
for (i in 1:a) {
for (j in 2:length(cond)) {
if (i == j) {
next
} else {
c <- exactTest(d, pair=c(cond[j],cond[i]))
res <- c$table
res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
output$rawP <- output$PValue
output$logFC <- output$logFC
output$fdr <- p.adjust(output$rawP, method ='fdr')
output$bonf <- p.adjust(output$rawP, method ='bonferroni')
write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
filt.out <- na.omit(output[output$fdr < 0.05,])
if (nrow(filt.out) > 2) {
subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
gnames <- filt.out[c('ensembl','symbol')]
s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
STREE <- hclust(dist(t(subset)))
zscores <- scale(t(subset))
ngenes <- length(colnames(zscores))
textscale <- (1/(ngenes/30))
if (textscale > 1) {
textscale <-1
}
if (textscale < 0.1) {
textscale <- 0.1
}
png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
dev.off()
gcont <- paste(cond[j],cond[i],sep='-')
qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
}
}
}
}
###################### Run Limma VOOM ########################
# design <- model.matrix(~0+grps)
# colnames(design) <- grpnames
# a <- length(cond)-1
# design.pairs <- c()
# k <- 0
# for (i in 1:a) {
# for (j in 2:length(cond)) {
# if (i == j) {
# next
# } else {
# k <- k+1
# design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
# }
# }
# }
#contrast.matrix <- makeContrasts(design.pairs,levels=design)
#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
#d <- calcNormFactors(d)
#d <- estimateCommonDisp(d)
# v <- voom(d,design,plot=TRUE)
# fit <- lmFit(v,design)
# fit2 <- contrasts.fit(fit, contrast.matrix)
# fit2 <- eBayes(fit2)
# comps <- colnames(fit2$coefficients)
# for (i in 1:length(comps)) {
# res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
# res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
# output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
# output$rawP <- output$P.Value
# output$logFC <- output$adj.P.Val
# output$fdr <- p.adjust(output$rawP, method ='fdr')
# output$bonf <- p.adjust(output$rawP, method ='bonferroni')
# write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
# filt.out <- na.omit(output[output$fdr < 0.05,])
# if (nrow(filt.out) > 2) {
# subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
# gnames <- filt.out[c('ensembl','symbol')]
# s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
# STREE <- hclust(dist(t(subset)))
# zscores <- scale(t(subset))
# ngenes <- length(colnames(zscores))
# textscale <- (1/(ngenes/30))
# if (textscale > 1) {
# textscale <-1
# }
# if (textscale < 0.1) {
# textscale <- 0.1
# }
# png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
# heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
# legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
# dev.off()
# }
# }
#!/usr/bin/perl -w
#parse_gencode.pl
open OUT, ">genenames.txt" or die $!;
print OUT join("\t",'chrom','start','end','ensembl','symbol','type'),"\n";
my $gtf = shift @ARGV;
open GCODE, "<$gtf" or die $!;
while (my $line = <GCODE>) {
chomp($line);
next if ($line =~ m/^#/);
my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) =
split(/\t/,$line);
next unless ($feature eq 'gene');
$info =~ s/\"//g;
my %hash;
foreach $a (split(/;\s*/,$info)) {
my ($key,$val) = split(/ /,$a);
$hash{$key} = $val;
}
$hash{gene_id} =~ s/\.\d+//;
print OUT join("\t",$chrom,$start,$end,$hash{gene_id},$hash{gene_name},$hash{gene_type}),"\n";
}
#!/bin/bash
#rnaseqalign.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-b --BAM File"
echo "-g --GTF File"
echo "-s --stranded"
echo "-p --Prefix for output file name"
echo "Example: bash geneabudance.sh genect_rnaseq/geneabundance.sh -s stranded -g gtf_file -p pair_id -b bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:g:p:s:h opt
do
case $opt in
b) sbam=$OPTARG;;
g) gtf=$OPTARG;;
p) pair_id=$OPTARG;;
s) stranded=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]]
then
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
if [[ $SLURM_CPUS_ON_NODE > 64 ]]
then
SLURM_CPUS_ON_NODE=64
fi
source /etc/profile.d/modules.sh
module load subread/1.6.1 stringtie/1.3.2d-intel
featureCounts -s $stranded -M --fraction -J --ignoreDup -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
mkdir ${pair_id}_stringtie
cd ${pair_id}_stringtie
stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
#!/bin/bash
#rnaseqalign.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-d --DEA"
echo "Example: bash statanal.sh"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :d:h opt
do
case $opt in
d) dea=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
# Check for mandatory options
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
source /etc/profile.d/modules.sh
perl $baseDir/concat_cts.pl -o ./ *.cts
perl $baseDir/concat_fpkm.pl -o ./ *.fpkm.txt
perl $baseDir/concat_ctsum.pl -o ./ *.cts.summary
cp design.txt design.shiny.txt
cp geneset.gmt geneset.shiny.gmt
if [[ $dea == 'skip' ]]
then
touch empty.png
touch bg.rda
else
module load R/3.2.1-intel
Rscript $baseDir/dea.R
Rscript $baseDir/build_ballgown.R *_stringtie
perl $baseDir/concat_edgeR.pl *.edgeR.txt
fi