From 799e8345767c51de3fd73a759ad450d54c3767ef Mon Sep 17 00:00:00 2001 From: Zhiyu <zhiyu.zhao@utsouthwestern.edu> Date: Fri, 21 Jan 2022 10:47:52 -0600 Subject: [PATCH] 01212022 --- V2.0c/ODA/DESeq2_DEXSeq_Analysis.R | 282 ----------------------------- V2.0c/ODA/getData.R | 29 ++- V2.0c/ODA/getSettings.R | 20 +- V2.0c/ODA/odaObject.R | 2 - V2.0c/ODA/statTests.R | 2 +- V2.0c/ODA/workBook.R | 28 +-- 6 files changed, 50 insertions(+), 313 deletions(-) delete mode 100644 V2.0c/ODA/DESeq2_DEXSeq_Analysis.R diff --git a/V2.0c/ODA/DESeq2_DEXSeq_Analysis.R b/V2.0c/ODA/DESeq2_DEXSeq_Analysis.R deleted file mode 100644 index da66f22..0000000 --- a/V2.0c/ODA/DESeq2_DEXSeq_Analysis.R +++ /dev/null @@ -1,282 +0,0 @@ -#############################Begin Reading Arguments############################# -if (1) { #Set to 1 for actual files or 0 for debugging a test file in the R console. - args=commandArgs(trailingOnly=T) - if (length(args)<6) - stop('Please specify method(deseq2|dexseq), analysis(norm|salmon|de|size factor file(deseq2 only)), data file/folder(for Salmon samples), sample design file, gene annotation file/ID mapping file(for Salmon), output folder, subsetting option(no|genes|samples|both), gene file(na|file_name), sample name pattern(na|pattern), minCount(=3) in a sample, minSample in a group(=2) with at least minCount, full formula, andreduced formula.') - method=args[1] - if (method!='deseq2' && method!='dexseq') - stop('Method must be deseq2|dexseq.') - analysis=args[2] - if (analysis!='norm' && analysis!='de'&& analysis!='salmon') { - analysis='normalized_de' - sizeFactors=read.table(args[2],header=T,sep='\t',check.names=F) - } - dataFile=args[3] #A merged table of counts or a folder for Salmon samples' quant.sf files. - sampleFile=args[4] #A file with sample, condition, and other columns. - annotationFile=args[5] #A gene annotation file or ID mapping file for Salmon. - outputDir=args[6] - subsetting=args[7] #no, genes, samples, both. - if (is.na(subsetting)) - subsetting='no' - if (subsetting=='genes' || subsetting=='both') { - geneSubset=unlist(read.table(args[8],header=F)) #Gene subset file should be a one-column list with no header. If N/A, use "" for args[6]. - } - if (subsetting=='samples' || subsetting=='both') { - sampleSubset=args[9] #Sample subset should be a regex pattern for grep(). - } - minCount=as.numeric(args[10]) - if (is.na(minCount)) - minCount=3 - minSample=as.numeric(args[11]) - if (is.na(minSample)) - minSample=2 - fullFormula=args[12] - if (is.na(fullFormula) || fullFormula=='na' ) - fullFormula='' - reducedFormula=args[13] - if (is.na(reducedFormula) || reducedFormula=='na' ) - reducedFormula='' -} else { - method='dexseq' - analysis='de' - dataFile='./raw_counts.txt' - sampleFile='../../Design.csv' - annotationFile='/archive/CRI/shared/Reference_Genomes/mouse/Ensembl/100/Mus_musculus.GRCm38.100.sorted.gtf' - outputDir='./' - subsetting='both' - geneSubset=unlist(read.table('./genes.txt',header=F)) - sampleSubset=unlist(strsplit('HSC*',',')) - sampleSubset='HSC*' - fullFormula='' - reducedFormula='' -} -#figuresPath=file.path(outputDir,'Figures') -#if (dir.exists(figuresPath)&&!rlang::is_empty(dir(figuresPath))) { -# stop('Figures folder is not empty.') -#} else { -# dir.create(figuresPath,recursive=T) -#} -#############################End Reading Arguments############################# -myLibPath='~/R/4.0.2-gccmkl/' #Changed on 4/23/2021. -dir.create(myLibPath,recursive=T,showWarnings=F) -.libPaths(c(myLibPath,.libPaths())) -if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager") -if (!require(GenomicFeatures)) { - BiocManager::install("GenomicFeatures",lib=myLibPath) - library(GenomicFeatures) -} -if (!require(tximport)) { - BiocManager::install("tximport",lib=myLibPath) - library(tximport) -} -if (!require(apeglm)) { - BiocManager::install("apeglm",lib=myLibPath) - library(apeglm) -} -library(tools) -library(rtracklayer) #Installed with GenomicFeatures -library(DEXSeq) #It includes DESeq2. -message('###############################Analysis started.###############################') -if (analysis=='salmon' && method=='deseq2') { - message('Reading Salmon-quantified samples...') - files=dir(dataFile,recursive=T,pattern="quant.sf",full.names=T) - names(files)=dir(dataFile) - tx2gene=read.table(annotationFile,sep="\t",header=F) - txiTranscripts=tximport(files,type='salmon',txOut=T) - txiGenes=summarizeToGene(txiTranscripts,tx2gene) -} else { - message('Reading data table...') - if (file_ext(dataFile)=='csv') { - countData=read.csv(dataFile,sep=',',header=T,row=1,check.names=F) - } else { - countData=read.table(dataFile,sep='\t',header=T,row=1,check.names=F) - } - message(paste0('Total number of features: ',dim(countData)[1])) -} -message('Reading sample info...') -samples=read.csv(sampleFile,sep=',',stringsAsFactors=F) -if (method=='dexseq') { - tempStr=strsplit(rownames(countData),':') - groupID=sapply(tempStr,`[`,1) #Genes - featureID=sapply(tempStr,`[`,2) #Exons within each gene. -} -if (subsetting=='genes' || subsetting=='both') { - if (method=='deseq2') { - if (analysis!='salmon') { - countData=countData[rownames(countData) %in% geneSubset,] - message(paste0('Subset number of genes: ',dim(countData)[1])) - } - } - if (method=='dexseq') { - countData=countData[groupID %in% geneSubset,] - tempStr=strsplit(rownames(countData),':') - groupID=sapply(tempStr,`[`,1) #Genes - featureID=sapply(tempStr,`[`,2) #Exons within each gene. - message(paste0('Subset number of genes: ',length(unique(groupID)))) - message(paste0('Subset number of features: ',length(featureID))) - } -} -if (subsetting=='samples' || subsetting=='both') { - samples=samples[grep(sampleSubset,samples$sample),] -} -if (analysis!='salmon') { - countData=countData[,samples$sample] - temp=t(rowsum(t(ifelse(countData>=minCount,1,0)),group=samples$condition)) - countData=countData[rowSums(temp>=minSample)>0,] - print('Data size:') - print(dim(countData)) - message(paste0('Filtered number of features: ',dim(countData)[1])) - print('Sample names: ') - print(colnames(countData)) -} -groups=unique(samples$condition) -g=length(groups) -if (g<2) - stop('At least 2 groups should be selected.') -#if (method=='dexseq' && g>2) -# stop('DEXSeq comparisons with >2 groups is currently disabled.') -if (method=="deseq2") { - if (fullFormula=="") { - fullFormula=formula("~ condition") - } else { - fullFormula=formula(fullFormula) - } - if (reducedFormula=="") { - reducedFormula=formula("~ 1") - } else { - reducedFormula=formula(reducedFormula) - } - if (analysis=='salmon') { - message('Generating DESeq2 data object...') - dataSetT=DESeqDataSetFromTximport(txiTranscripts,samples,fullFormula) - if (subsetting=='genes' || subsetting=='both') - dataSetT=dataSetT[rownames(dataSetT) %in% geneSubset,] - if (subsetting=='samples' || subsetting=='both') - dataSetT=dataSetT[,colnames(dataSetT) %in% samples$sample] - dataSetG=DESeqDataSetFromTximport(txiGenes,samples,fullFormula) - if (subsetting=='genes' || subsetting=='both') - dataSetG=dataSetG[rownames(dataSetG) %in% geneSubset,] - if (subsetting=='samples' || subsetting=='both') - dataSetG=dataSetG[,colnames(dataSetG) %in% samples$sample] - } else { - # message('Reading gene annotations...') #Not necessary for DESeq2. - # tx=makeTxDbFromGFF(annotationFile,format="gtf") - # exonicParts=exonsBy(tx,by="gene") #Use cdsBy if reads are counted using cds regions. - message('Generating DESeq2 data object...') - dataSet=DESeqDataSetFromMatrix(countData,samples,fullFormula) - # rowRanges(dataSet)=exonicParts[names(exonicParts) %in% rownames(dataSet)] - } -} -if (method=="dexseq") { - if (fullFormula=="") { - fullFormula=formula("~ sample + exon + condition:exon") - } else { - fullFormula=formula(fullFormula) - } - if (reducedFormula=="") { - reducedFormula=formula("~ sample + exon") - } else { - reducedFormula=formula(reducedFormula) - } - message('Reading gene annotations...') #This is necessary for DEXSeq. - exonicParts=import.gff2(annotationFile) #GFF file generated by DEXSeq's python code. - names(exonicParts)=paste(exonicParts$gene_id,exonicParts$exonic_part_number,sep=':') - message('Generating DEXSeq data object...') - dataSet=DEXSeqDataSet(countData,samples,fullFormula,featureID,groupID,featureRanges=exonicParts[rownames(countData)]) -} -if (analysis=='salmon') { - outputFile1=file.path(outputDir,'gene_count_matrix.txt') - cat("ID\t",file=outputFile1) - write.table(as.data.frame(counts(dataSetG,normalized=F)),file=outputFile1,sep="\t",col.names=colnames(dataSetG),quote=F,append=T) - outputFile1=file.path(outputDir,'transcript_count_matrix.txt') - cat("ID\t",file=outputFile1) - write.table(as.data.frame(counts(dataSetT,normalized=F)),file=outputFile1,sep="\t",col.names=colnames(dataSetT),quote=F,append=T) -} else if (analysis=='norm' || analysis=='de') { - message('Estimating size factors...') - dataSet=estimateSizeFactors(dataSet) - if (analysis=='norm') { - message('Writing data files...') - outputFile1=file.path(outputDir,'normalized_counts.txt') - cat("ID\t",file=outputFile1) - # outputFile2=file.path(outputDir,'fpkms.txt') #TBD - # cat("ID\t",file=outputFile2) - outputFile3=file.path(outputDir,'size_factors.txt') - if (method=='deseq2') { - write.table(as.data.frame(counts(dataSet,normalized=T)),file=outputFile1,sep="\t",col.names=colnames(countData),quote=F,append=T) - # write.table(as.data.frame(fpkm(dataset,robust=T)),file=outputFile2,sep="\t",col.names=colnames(countData),quote=F,append=T) - write.table(as.data.frame(t(sizeFactors(dataSet))),file=outputFile3,col.names=colnames(countData),row.names=F,sep="\t",quote=F) - } - if (method=='dexseq') { - write.table(as.data.frame(featureCounts(dataSet,normalized=T)),file=outputFile1,sep="\t",col.names=colnames(countData),quote=F,append=T) - # write.table(as.data.frame(fpkm(dataset,robust=T)),file=outputFile2,sep="\t",col.names=colnames(countData),quote=F,append=T) - write.table(t(sizeFactors(dataSet)[1:dim(samples)[1]]),file=outputFile3,col.names=samples$sample,row.names=F,sep="\t",quote=F) - } - } - print('Size factors:') - print(sizeFactors) -} else if (analysis=='normalized_de') { #normalized_de - message('Importing size factors...') - sizeFactors=sizeFactors[samples$sample] - sizeFactors(dataSet)=as.numeric(sizeFactors) #deseq2 only. - print('Size factors:') - print(sizeFactors) -} -if (analysis=='de' || analysis=='normalized_de') { - print('Full formula:') - print(fullFormula) - print('Reduced formula:') - print(reducedFormula) - message('Estimating dispersions...') - if (method=='deseq2') - dataSet=estimateDispersions(dataSet) - if (method=='dexseq') - dataSet=estimateDispersions(dataSet,formula=fullFormula) - message(paste0('Testing for ',g,' groups...')) - if (method=='deseq2') { - if (g>2) { - dataSet=nbinomLRT(dataSet,full=fullFormula,reduced=reducedFormula) - res=results(dataSet,cooksCutoff=F) - } - if (g==2) { - dataSet=nbinomWaldTest(dataSet) - res=results(dataSet,cooksCutoff=F,contrast=c('condition',groups[1],groups[2])) - res=lfcShrink(dataSet, coef=paste('condition',groups[2],'vs',groups[1],sep='_'), type="apeglm") - res$log2FoldChange=-res$log2FoldChange #Changed so that log2FC=meanGroup1/meanGroup2 - } - colnames(res)=paste("DESeq2",colnames(res),sep="_") - for (i in 1:g) { - res=cbind(res,rowMeans(counts(dataSet,normalized=T)[,samples$sample[samples$condition==groups[i]]])) - colnames(res)[dim(res)[2]]=paste0('normalized_',groups[i]) - } - for (i in 1:g) { - res=cbind(res,counts(dataSet,normalized=T)[,samples$sample[samples$condition==groups[i]]]) - colnames(res)[(dim(res)[2]-sum(samples$condition==groups[i])+1):(dim(res)[2])]=paste0('normalized_',samples$sample[samples$condition==groups[i]]) - } - for (i in 1:g) { - res=cbind(res,counts(dataSet,normalized=F)[,samples$sample[samples$condition==groups[i]]]) - colnames(res)[(dim(res)[2]-sum(samples$condition==groups[i])+1):(dim(res)[2])]=samples$sample[samples$condition==groups[i]] - } - res=res[order(res$DESeq2_padj),] - res=cbind(rownames(res),res) - colnames(res)[1]='ID' - } - if (method=='dexseq') { - dataSet=testForDEU(dataSet,fullModel=fullFormula,reducedModel=reducedFormula) - dataSet=estimateExonFoldChanges(dataSet,fitExpToVar="condition") - if (g>2) - res=DEXSeqResults(dataSet)#[,c(1,2,6,7,8:(g+8))] - if (g==2) - res=DEXSeqResults(dataSet)#[,c(1,2,6:11)] - res=res[order(res$padj),] - colnames(res)=paste("DEXSeq",colnames(res),sep="_") - } - message('Writing test results...') - if (g>2) - outputFile=file.path(outputDir,paste0('DE_Result_of_',g,'_Groups.txt')) - if (g==2) - outputFile=file.path(outputDir,paste0('DE_Result_',groups[1],'_vs_',groups[2],'.txt')) - write.table(as.data.frame(res),file=outputFile,sep="\t",quote=F,col.names=T,row.names=F,append=F) -} -message('####################################Analysis completed.####################################') - diff --git a/V2.0c/ODA/getData.R b/V2.0c/ODA/getData.R index 3924b94..d3f1706 100644 --- a/V2.0c/ODA/getData.R +++ b/V2.0c/ODA/getData.R @@ -75,10 +75,17 @@ getData=function(inputFile,samples,features,parameters) { } rawDataList[,colnames(samples)]=samples[rawDataList$Sample,] + rawDataList$Feature=factor(rawDataList$Feature,levels=unique(rawDataList$Feature)) + rawDataList$BatchID=factor(rawDataList$BatchID,levels=unique(rawDataList$BatchID)) + rawDataList$GroupID=factor(rawDataList$GroupID,levels=unique(rawDataList$GroupID)) + rawDataList$SubjectID=factor(rawDataList$SubjectID,levels=unique(rawDataList$SubjectID)) rawDataList=aggregate(Value~Feature+BatchID+GroupID+SubjectID,FUN=mean,data=rawDataList) #Data are stored in a list. Technical replicates are averaged if present. rawDataList$Sample=paste(rawDataList$BatchID,rawDataList$GroupID,rawDataList$SubjectID,sep='_') rawDataTable=reshape2::dcast(rawDataList,Feature~BatchID+GroupID+SubjectID,value.var='Value') #Data are also stored in a table with features in rows and samples in columns. - samples=unique(samples) + if (sum(!is.na(samples$Size))) + samples=aggregate(Size~BatchID+GroupID+SubjectID,FUN=mean,data=samples) + else + samples=unique(samples) #Bug fix: added the if lines above on 12/14/2021, so that sizes are averaged for technical replicates. samples$ID=rownames(samples) rownames(samples)=paste(samples$BatchID,samples$GroupID,samples$SubjectID,sep='_') rownames(rawDataTable)=rawDataTable[,1] @@ -86,7 +93,6 @@ getData=function(inputFile,samples,features,parameters) { rawDataTable=as.matrix(rawDataTable) class(rawDataTable)='numeric' samples=samples[colnames(rawDataTable),] - if (!is.null(featureDescription)) { colnames(featureDescription)=c('ID','Description') rownames(featureDescription)=featureDescription[,1] @@ -140,7 +146,8 @@ getData=function(inputFile,samples,features,parameters) { presentSamples=(rawDataTable>parameters$valueFilter) presentSamples[presentSamples]=1 presentSamples[!presentSamples]=0 - sampleSummary=as.data.frame(colSums(presentSamples),drop=F) + presentSamples[is.na(presentSamples)]=0 #Bug fix: this line added on 12/14/2021. + sampleSummary=as.data.frame(colSums(presentSamples,na.rm=T),drop=F) #Bug fix: na.rm=T added on 12/14/2021. names(sampleSummary)='nFeatures' presentGroups=matrix(nrow=attributes$nFeatures,ncol=attributes$nGroups) colnames(presentGroups)=attributes$groups @@ -153,13 +160,19 @@ getData=function(inputFile,samples,features,parameters) { groupSummary[i,1]=sum(samples$GroupID==i) } temp=presentGroups>=parameters$groupCountFilter - groupSummary[,2]=colSums(temp) - presentGroups=cbind(rowSums(temp),presentGroups) + groupSummary[,2]=colSums(temp,na.rm=T) #Bug fix: na.rm=T added on 12/14/2021. + presentGroups=cbind(rowSums(temp,na.rm=T),presentGroups) #Bug fix: na.rm=T added on 12/14/2021. colnames(presentGroups)[1]='Present' - presentSamples=cbind(rowSums(presentSamples),presentSamples) + presentSamples=cbind(rowSums(presentSamples,na.rm=T),presentSamples) #Bug fix: na.rm=T added on 12/14/2021. colnames(presentSamples)[1]='Present' - attributes$nFeaturesPresentInAnySample=sum(presentSamples[,'Present']>0) - attributes$nFeaturesPresentInAnyGroup=sum(presentGroups[,'Present']>0) + attributes$nFeaturesPresentInAnySample=sum(presentSamples[,'Present']>0,na.rm=T) #Bug fix: na.rm=T added on 12/14/2021. + attributes$nFeaturesPresentInAnyGroup=sum(presentGroups[,'Present']>0,na.rm=T) #Bug fix: na.rm=T added on 12/14/2021. + if (!attributes$nFeaturesPresentInAnySample) + stop('No features are present in at least one sample! Consider decreasing the valueFilter threshold.') + if (!attributes$nFeaturesPresentInAnyGroup) + stop('No features are present in at least one group! Consider decreasing the groupCountFilter threshold.') + message(paste0(attributes$nFeaturesPresentInAnySample,' features are present in at least one sample. If the number is low, consider decreasing the valueFilter threshold.')) + message(paste0(attributes$nFeaturesPresentInAnyGroup,' features are present in at least one group. If the number is low, consider decreasing the groupCountFilter threshold.')) message('Finished reading data.') return(list(rawDataList=rawDataList,rawDataTable=rawDataTable,featureDescription=featureDescription,samples=samples,presentSamples=presentSamples,sampleSummary=sampleSummary,presentGroups=presentGroups,groupSummary=groupSummary,attributes=attributes,features=features)) diff --git a/V2.0c/ODA/getSettings.R b/V2.0c/ODA/getSettings.R index dcc8b31..be77982 100755 --- a/V2.0c/ODA/getSettings.R +++ b/V2.0c/ODA/getSettings.R @@ -116,14 +116,16 @@ getFeatures=function(inputFile) { # stop('Both joint metabolites and joint genes must be present.') if (!is.null(eaMetabolites)&&sum(is.na(eaMetabolites[,2]))>0&&sum(is.na(eaMetabolites[,2]))<dim(eaMetabolites)[1]) warning('Empty fold changes are replaced by zeros for the eaMetabolites.') - eaMetabolites[is.na(eaMetabolites[,2]),2]=0 + if (!is.null(eaMetabolites)) + eaMetabolites[is.na(eaMetabolites[,2]),2]=0 if (!is.null(eaMetabolites)) { rownames(eaMetabolites)=eaMetabolites[,1] eaMetabolites=eaMetabolites[,-1,drop=F] } if (!is.null(eaGenes)&&sum(is.na(eaGenes[,2]))>0&&sum(is.na(eaGenes[,2]))<dim(eaGenes)[1]) warning('Empty fold changes are replaced by zeros for the eaGenes.') - eaGenes[is.na(eaGenes[,2]),2]=0 + if (!is.null(eaGenes)) + eaGenes[is.na(eaGenes[,2]),2]=0 if (!is.null(eaGenes)) { rownames(eaGenes)=eaGenes[,1] eaGenes=eaGenes[,-1,drop=F] @@ -268,10 +270,16 @@ getComparisons=function(inputFile) { if (deTable[i,1]=='All Combinations' || deTable[i,1]=='Overall') deANOVAs[[paste0('c',i)]]=temp } - if (!is.null(dePairs)) - dePairs=unique(lapply(dePairs,sort)) - if (!is.null(deANOVAs)) - deANOVAs=unique(lapply(deANOVAs,sort)) + if (!is.null(dePairs)) { #Bug fix: order of groups is kept if no duplicated comparisons. 12/14/2021. + temp=unique(lapply(dePairs,sort)) + if (length(temp)<length(dePairs)) #Duplicated comparisons; only one copy is kept. + dePairs=temp + } + if (!is.null(deANOVAs)) { #Bug fix: order of groups is kept if no duplicated comparisons. 12/14/2021. + temp=unique(lapply(deANOVAs,sort)) + if (length(temp)<length(deANOVAs)) #Duplicated comparisons; only one copy is kept. + deANOVAs=temp + } comparisons$dePairs=dePairs comparisons$deANOVAs=deANOVAs } else diff --git a/V2.0c/ODA/odaObject.R b/V2.0c/ODA/odaObject.R index 747e480..1e655c6 100644 --- a/V2.0c/ODA/odaObject.R +++ b/V2.0c/ODA/odaObject.R @@ -17,12 +17,10 @@ createODAObject=function(inputFile,outputPath) { parameters=getParameters(inputFile) comparisons=getComparisons(inputFile) rawData=getData(inputFile,samples,features,parameters) - presentFeatures=rownames(rawData$presentGroups)[rawData$presentGroups[,'Present']>0] rawData$rawDataTable=rawData$rawDataTable[presentFeatures,] #Raw data is filtered before normalization/transformation. rawData$rawDataList=rawData$rawDataList[rawData$rawDataList[,'Feature']%in%presentFeatures,] #Raw data is filtered before normalization/transformation. # rawData$featureDescription=rawData$featureDescription[presentFeatures] - parameters$groupMeanLowerBound=min(rawData$rawDataTable[!is.na(rawData$rawDataTable)&rawData$rawDataTable>parameters$valueFilter])/2 #Added on 7/6/2021. samples=rawData$samples diff --git a/V2.0c/ODA/statTests.R b/V2.0c/ODA/statTests.R index f5299bf..bcb29be 100644 --- a/V2.0c/ODA/statTests.R +++ b/V2.0c/ODA/statTests.R @@ -193,7 +193,7 @@ anova_test=function(dataList,groups,groupMeanLowerBound,logTransform,normal=T,va glmPerFeature=function(featureData,formula,familyStr,testStr) { statMethod='No test' pValue=NA - if (sd(featureData$Value)==0) { + if (sd(featureData$Value,na.rm=T)==0) { #Bug fix: na.rm=T added on 12/14/2021. statMethod='No test (SD=0)' } else { if (testStr=='glm') diff --git a/V2.0c/ODA/workBook.R b/V2.0c/ODA/workBook.R index f8c12bf..62dff76 100644 --- a/V2.0c/ODA/workBook.R +++ b/V2.0c/ODA/workBook.R @@ -87,7 +87,7 @@ writeGeneralFigures=function(wbObj,sheetName,dataTable,dataList,samples,paramete if (!sheetName%in%names(wbObj$wb)) addWorksheet(wbObj$wb,sheetName) plotPosition=plotPosition-1 #For position 1, plotting starts at row offset 0. - psFile=file.path(figuresPath,paste0(plotType,'.ps')) + pdfFile=file.path(figuresPath,paste0(plotType,'.pdf')) pngFile=file.path(figuresPath,paste0(plotType,'.png')) options(warn=-1) p1=ggplot(dataList,aes(x=Sample,y=Value,color=GroupID,fill=GroupID))+geom_violin(color='black',alpha=0.5)+geom_jitter(position=position_jitter(0.2),size=0.1)+geom_boxplot(color='black',outlier.size=-1,width=0.1)+facet_grid(.~Sample,scales='free_x',space='free_x')+theme(strip.text=element_blank(),axis.text.x = element_text(angle = 90, hjust = 1),legend.position = "none")+ggtitle(paste0('Violin-Boxplot of ',plotType)) @@ -100,26 +100,26 @@ writeGeneralFigures=function(wbObj,sheetName,dataTable,dataList,samples,paramete p3=ggplot(statsData,aes(x=Sample,y=Value,color=GroupID,fill=GroupID,alpha=0.5))+geom_bar(stat='identity')+facet_grid(Stat~BatchID,scales='free',space='free_x')+theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position = "none")+ggtitle(paste0('Statistics of ',plotType)) p=wrap_plots(p1,p2,nrow=2,heights=c(3,2)) p=wrap_plots(p,p3,widths=c(3,1)) -# ggsave(psFile,plot=p,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) +# ggsave(pdfFile,plot=p,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) # ggsave(pngFile,plot=p,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) # insertImage(wbObj$wb,sheetName,pngFile, width=dim(dataTable)[2], height=8, unit='in',startRow=plotPosition*45+1,startCol=1) # plotPosition=plotPosition+1 dataTable[is.na(dataTable)|dataTable==-Inf]=parameters$groupMeanLowerBound #Clustering doesn't work with NAs/Infs. Added on 3/16/2021. dataTable=dataTable[apply(dataTable,1,sd)>0,] #Scaling doesn't work with zero-sd rows. Added on 3/16/2021. if (!is.null(dim(dataTable))&&dim(dataTable)[1]>1) { #At least 2 features are required. -# psFile=file.path(figuresPath,paste0(plotType,'_heatmap.ps')) +# pdfFile=file.path(figuresPath,paste0(plotType,'_heatmap.pdf')) # pngFile=file.path(figuresPath,paste0(plotType,'_heatmap.png')) - p4=pheatmap(dataTable,main=paste0('Heatmap of ',plotType),border_color=NA,show_rownames=grepl('features',plotType),treeheight_row=0,annotation_col=samples[colnames(dataTable),c('BatchID','GroupID')],annotation_legend=T,na_col='gray',scale=ifelse(parameters$scaling,'row','none'),col=parameters$heatmapColors,cluster_rows=parameters$hclustFeatures,cluster_cols=parameters$hclustSamples)$gtable -# ggsave(psFile,plot=p$gtable,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) + p4=pheatmap(dataTable,main=paste0('Heatmap of ',plotType),border_color=NA,show_rownames=grepl('Selected',plotType),treeheight_row=0,annotation_col=samples[colnames(dataTable),c('BatchID','GroupID')],annotation_legend=T,na_col='gray',scale=ifelse(parameters$scaling,'row','none'),col=parameters$heatmapColors,cluster_rows=parameters$hclustFeatures,cluster_cols=parameters$hclustSamples)$gtable +# ggsave(pdfFile,plot=p$gtable,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) # ggsave(pngFile,plot=p$gtable,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) # insertImage(wbObj$wb,sheetName,pngFile, width=min(50,max(dim(dataTable)[2]/3,4)), height=8, unit='in',startRow=plotPosition*45+1,startCol=1) -# psFile=file.path(figuresPath,paste0(plotType,'_scaled_heatmap.ps')) +# pdfFile=file.path(figuresPath,paste0(plotType,'_scaled_heatmap.pdf')) # pngFile=file.path(figuresPath,paste0(plotType,'_scaled_heatmap.png')) # p=pheatmap(dataTable,main=paste0('Scaled Heatmap of ',plotType),border_color=NA,show_rownames=grepl('features',plotType),treeheight_row=0,annotation_col=samples[colnames(dataTable),c('BatchID','GroupID')],annotation_legend=T,na_col='gray',scale='row',col=heatmapColors,cluster_rows=hclustFeatures,cluster_cols=hclustSamples) -# ggsave(psFile,plot=p$gtable,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) +# ggsave(pdfFile,plot=p$gtable,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) # ggsave(pngFile,plot=p$gtable,width=min(50,max(dim(dataTable)[2],12)),height=8,units='in',limitsize=F) # insertImage(wbObj$wb,sheetName,pngFile, width=min(50,max(dim(dataTable)[2]/3,4)), height=8, unit='in',startRow=plotPosition*45+1,startCol=ceiling(max(dim(dataTable)[2]/3,4)*2)) -# psFile=file.path(figuresPath,paste0(plotType,'_pca_cor.ps')) +# pdfFile=file.path(figuresPath,paste0(plotType,'_pca_cor.pdf')) # pngFile=file.path(figuresPath,paste0(plotType,'_pca_cor.png')) pcaRes=prcomp(t(dataTable),center=parameters$scaling,scale.=parameters$scaling) dataList=data.frame(pcaRes$x,samples) @@ -144,10 +144,10 @@ writeGeneralFigures=function(wbObj,sheetName,dataTable,dataList,samples,paramete p=wrap_plots(p,p4,p5,p6,widths=c(2,1,1,1)) # p2=ggpairs(as.data.frame(dataTable),lower=list(continuous='smooth'),title=paste0('Scatter Plot of ',plotType)) # p=plot_grid(p1,ggmatrix_gtable(p2),ncol=2,rel_widths=c(1/3,2/3)) -# ggsave(psFile,plot=p,width=min(50,max(dim(dataTable)[2]*2,16)),height=8,units='in',limitsize=F) +# ggsave(pdfFile,plot=p,width=min(50,max(dim(dataTable)[2]*2,16)),height=8,units='in',limitsize=F) # ggsave(pngFile,plot=p,width=min(50,max(dim(dataTable)[2]*2,16)),height=8,units='in',limitsize=F) # insertImage(wbObj$wb,sheetName,pngFile, width=min(50,max(dim(dataTable)[2]*2,16)), height=8, unit='in',startRow=plotPosition*45+1,startCol=ceiling(max(dim(dataTable)[2]/3,4)*2)) - ggsave(psFile,plot=p,width=48,height=8,units='in',limitsize=F,device=cairo_ps) + ggsave(pdfFile,plot=p,width=48,height=8,units='in',limitsize=F,device=cairo_ps) ggsave(pngFile,plot=p,width=48,height=8,units='in',limitsize=F,type='cairo') insertImage(wbObj$wb,sheetName,pngFile,width=48, height=8, unit='in',startRow=plotPosition*48+2,startCol=1) writeData(wbObj$wb,sheetName,plotType,startRow=plotPosition*48+1,startCol=1) @@ -172,7 +172,7 @@ writeFeatureFigures=function(wbObj,sheetName,dataTable,dataList,plotType,plotPos if (!sheetName%in%names(wbObj$wb)) addWorksheet(wbObj$wb,sheetName) plotPosition=plotPosition-1 #For position 1, plotting starts at row offset 0. - psFile=file.path(figuresPath,paste0(plotType,'.ps')) + pdfFile=file.path(figuresPath,paste0(plotType,'.pdf')) pngFile=file.path(figuresPath,paste0(plotType,'.png')) nGroups=length(unique(dataList$GroupID)) nBatches=length(unique(dataList$BatchID)) @@ -198,7 +198,7 @@ writeFeatureFigures=function(wbObj,sheetName,dataTable,dataList,plotType,plotPos p11=wrap_plots(p9,p10,nrow=2,heights=c(3,2)) p=wrap_plots(p,p11,ncol=2,widths=c(1+nGroups+nBatches,length(unique(paste0(dataList$GroupID,'_',dataList$BatchID))))) } - ggsave(psFile,plot=p,width=48,height=8,units='in',limitsize=F,device=cairo_ps) + ggsave(pdfFile,plot=p,width=48,height=8,units='in',limitsize=F,device=cairo_ps) ggsave(pngFile,plot=p,width=48,height=8,units='in',limitsize=F,type='cairo') insertImage(wbObj$wb,sheetName,pngFile,width=48, height=8, unit='in',startRow=plotPosition*48+2,startCol=1) writeData(wbObj$wb,sheetName,plotType,startRow=plotPosition*48+1,startCol=1) @@ -229,7 +229,7 @@ writeEulerDiagrams=function(wbObj,sheetName,presentGroups,presentSamples,samples } } p=wrap_plots(p,ncol=length(p)) #From patchwork - ggsave(file.path(figuresPath,'Euler_Venn.ps'),plot=p,width=48,height=8,units='in',limitsize=F,device=cairo_ps) + ggsave(file.path(figuresPath,'Euler_Venn.pdf'),plot=p,width=48,height=8,units='in',limitsize=F,device=cairo_ps) ggsave(file.path(figuresPath,'Euler_Venn.png'),plot=p,width=48,height=8,units='in',limitsize=F,type='cairo') insertImage(wbObj$wb,sheetName,file.path(figuresPath,'Euler_Venn.png'),width=24, height=4, unit='in',startRow=plotPosition*48+2,startCol=1) writeData(wbObj$wb,sheetName,'Euler Diagrams',startRow=plotPosition*48+1,startCol=1) @@ -257,7 +257,7 @@ writeVolcanoPlot=function(wbObj,sheetName,groups,pAndFC,selectedFeatures,paramet p=p+geom_vline(xintercept=-log2(fcc),linetype="dashed",color = "blue",size=0.2) } } - plotFile=file.path(figuresPath,paste0(groups[1],'_vs_',groups[2],'.ps')) + plotFile=file.path(figuresPath,paste0(groups[1],'_vs_',groups[2],'.pdf')) plotFile2=file.path(figuresPath,paste0(groups[1],'_vs_',groups[2],'.png')) ggsave(plotFile,plot=p,width=8,height=8,units='in') ggsave(plotFile2,plot=p,width=8,height=8,units='in') -- GitLab