From 10a9e70cb8eb4ea56a59210e192213e2236e670a Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Thu, 2 May 2019 20:54:20 -0500 Subject: [PATCH] deleting dedundant code --- workflow/process_scripts | 2 +- workflow/scripts/DEA_htseq.R | 1 - workflow/scripts/build_ballgown.R | 16 -- workflow/scripts/concat_cts.pl | 101 --------- workflow/scripts/concat_edgeR.pl | 31 --- workflow/scripts/concat_fpkm.pl | 63 ------ workflow/scripts/dea.R | 242 --------------------- workflow/scripts/gencode_genename.pl | 22 -- workflow/scripts/uniform_integrated_vcf.pl | 63 ------ 9 files changed, 1 insertion(+), 540 deletions(-) delete mode 120000 workflow/scripts/DEA_htseq.R delete mode 100755 workflow/scripts/build_ballgown.R delete mode 100755 workflow/scripts/concat_cts.pl delete mode 100755 workflow/scripts/concat_edgeR.pl delete mode 100755 workflow/scripts/concat_fpkm.pl delete mode 100644 workflow/scripts/dea.R delete mode 100755 workflow/scripts/gencode_genename.pl delete mode 100755 workflow/scripts/uniform_integrated_vcf.pl diff --git a/workflow/process_scripts b/workflow/process_scripts index 93d52e9..56d8030 160000 --- a/workflow/process_scripts +++ b/workflow/process_scripts @@ -1 +1 @@ -Subproject commit 93d52e978cfe4f4bdc5f71e2e9a1af15ed60cf80 +Subproject commit 56d8030ee74ba8665eed40d74cc18caf359d7423 diff --git a/workflow/scripts/DEA_htseq.R b/workflow/scripts/DEA_htseq.R deleted file mode 120000 index 03646f1..0000000 --- a/workflow/scripts/DEA_htseq.R +++ /dev/null @@ -1 +0,0 @@ -dea.R \ No newline at end of file diff --git a/workflow/scripts/build_ballgown.R b/workflow/scripts/build_ballgown.R deleted file mode 100755 index b74bfa2..0000000 --- a/workflow/scripts/build_ballgown.R +++ /dev/null @@ -1,16 +0,0 @@ -library(ballgown) - -args<-commandArgs(TRUE) - -sample_dirs <- args -samples <- gsub('_stringtie','',sample_dirs) -design <- "design.txt" - -samtbl <- read.table(file=design,header=TRUE,sep='\t') - -bg <- ballgown(samples=sample_dirs, meas='all') -samples <- gsub('_stringtie','',sampleNames(bg)) -mergetbl <- merge(as.data.frame(samples),samtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE) -pData(bg) = data.frame(id=samples, group=as.character(mergetbl$SampleGroup)) - -save(bg,file="bg.rda") diff --git a/workflow/scripts/concat_cts.pl b/workflow/scripts/concat_cts.pl deleted file mode 100755 index 7626a06..0000000 --- a/workflow/scripts/concat_cts.pl +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/perl -w -#merge_tables.pl - -use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); -use File::Basename; - -my %opt = (); -my $results = GetOptions (\%opt,'genenames|g=s','outdir|o=s','help|h'); - -#$gnames = $opt{genenames}; - -open SYM, "<genenames.txt" or die $!; -my %symbs = (); -while (my $line = <SYM>) { - chomp($line); - my ($chrom,$start,$end,$ensembl,$symbol,$type) = split(/\t/,$line); - $ensembl = (split(/\./,$ensembl))[0]; - $names{$symbol} = {ensembl=>$ensembl,type=>$type}; -} - -my @files = @ARGV; -foreach $file (@files) { - chomp($file); - open IN, "<$file" or die $!; - $fname = basename($file); - my $sample = (split(/\./,$fname))[0]; - $samps{$sample} = 1; - my $command = <IN>; - my $head = <IN>; - chomp($head); - my @colnames = split(/\t/,$head); - while (my $line = <IN>) { - chomp($line); - my @row = split(/\t/,$line); - my $gene = $row[0]; - my $ct = $row[-1]; - my $type = $names{$gene}{'type'}; - $type = $gene unless ($names{$gene}{'type'}); - $types{$type} = 1; - $total{$sample} += $ct; - $readcts{$sample}{$type} += $ct; - next if($gene =~ m/^__/); - $cts{$gene}{$sample} = $ct; - } - close IN; -} - -my @gtypes = sort {$a cmp $b} keys %types; -open OUT, ">$opt{outdir}\/countTable.stats.txt" or die $!; -print OUT join("\t","Sample","Type","ReadPerc","ReadCt","TotalReads"),"\n"; - -foreach $sample (sort {$a cmp $b} keys %readcts) { - my @line; - my $total = 0; - foreach $type (@gtypes) { - $ct = 0; - $ct = sprintf("%.2e",$readcts{$sample}{$type}/$total{$sample}) if ($readcts{$sample}{$type}); - print OUT join("\t",$sample,$type,$ct,$readcts{$sample}{$type},$total{$sample}),"\n"; - } -} - -my %exc = (HBB=>1,HBD=>1,HBBP1=>1, - HBG1=>1,HBG2=>1,HBE1=>1, - HBZ=>1,HBZP1=>1,HBA2=>1,HBA1=>1); - -open OUT, ">$opt{outdir}\/countTable.txt" or die $!; -open CPM, ">$opt{outdir}\/countTable.logCPM.txt" or die $!; -my @samples = sort {$a cmp $b} keys %samps; -print OUT join("\t",'ENSEMBL','SYMBOL','TYPE',@samples),"\n"; -print CPM join("\t",'ENSEMBL','SYMBOL','TYPE',@samples),"\n"; - -foreach $gene (keys %cts) { - my @line; - my @cpm; - foreach $sample (@samples) { - unless ($cts{$gene}{$sample}) { - $cts{$gene}{$sample} = 0; - } - $cpm = ($cts{$gene}{$sample}/$total{$sample})*1e6; - push @cpm, sprintf("%.2f",log2($cpm)); - push @line, sprintf("%0.f",$cts{$gene}{$sample}); - } - my @sortct = sort {$b cmp $a} @cpm; - #next unless ($sortct[0] >= 1); - next if ($names{$gene}{type} eq 'rRNA'); - next if ($exc{$gene}); - print OUT join("\t",$names{$gene}{ensembl},$gene, - $names{$gene}{type},@line),"\n"; - print CPM join("\t",$names{$gene}{ensembl},$gene, - $names{$gene}{type},@cpm),"\n"; -} -close OUT; -close CPM; -sub log2 { - $n = shift @_; - if ($n < 1) { - return 0; - }else { - return(log($n)/log(2)); - } -} diff --git a/workflow/scripts/concat_edgeR.pl b/workflow/scripts/concat_edgeR.pl deleted file mode 100755 index e736e82..0000000 --- a/workflow/scripts/concat_edgeR.pl +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/perl -w -#concat_edgeR.pl - -#symbol chrom start end ensembl type logFC logCPM PValue monocytes neutrophils rawP fdr bonf - -my @files = @ARGV; -open OUT, ">edgeR.results.txt" or die $!; -print OUT join("\t",'G1','G2','ENSEMBL','SYMBOL','TYPE','logFC','logCPM', - 'pval','fdr','G1.Mean','G2.Mean'),"\n"; -foreach $f (@files) { - open IN, "<$f" or die $!; - my ($fname,$ext) = split(/\.edgeR\./,$f); - $fname =~ s/edgeR\///; - my ($g1,$g2) = split(/_/,$fname); - my $head = <IN>; - chomp($head); - my @colnames = split(/\t/,$head); - while (my $line = <IN>) { - chomp($line); - my @row = split(/\t/,$line); - my %hash; - foreach my $i (0..$#row) { - $hash{$colnames[$i]} = $row[$i]; - } - print OUT join("\t",$g1,$g2,$hash{ensembl},$hash{symbol},$hash{type}, - sprintf("%.2f",$hash{logFC}), - sprintf("%.2f",$hash{logCPM}), - sprintf("%.1e",$hash{rawP}),sprintf("%.1e",$hash{fdr}), - sprintf("%.0f",$hash{$g1}),sprintf("%.0f",$hash{$g2})),"\n"; - } -} diff --git a/workflow/scripts/concat_fpkm.pl b/workflow/scripts/concat_fpkm.pl deleted file mode 100755 index a0eebb1..0000000 --- a/workflow/scripts/concat_fpkm.pl +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/perl -w -#merge_tables.pl - -use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); -use File::Basename; - -my %opt = (); -my $results = GetOptions (\%opt,'genenames|g=s','outdir|o=s','help|h'); - -#$gnames = $opt{genenames}; - -open SYM, "<genenames.txt" or die $!; -my %symbs = (); -while (my $line = <SYM>) { - chomp($line); - my ($chrom,$start,$end,$ensembl,$symbol,$type) = split(/\t/,$line); - $ensembl = (split(/\./,$ensembl))[0]; - $names{$symbol} = {ensembl=>$ensembl,type=>$type}; -} - -my @files = @ARGV; -foreach $file (@files) { - chomp($file); - open IN, "<$file" or die $!; - $fname = basename($file); - my $sample = (split(/\./,$fname))[0]; - $samps{$sample} = 1; - my $command = <IN>; - my $head = <IN>; - chomp($head); - my @colnames = split(/\t/,$head); - while (my $line = <IN>) { - chomp($line); - my ($ensid,$gene,$chr,$strand,$start,$end,$cov,$fpkm,$tmp) = split(/\t/,$line); - $cts{$gene}{$sample} = $fpkm; - } - close IN; -} - -my %exc = (HBB=>1,HBD=>1,HBBP1=>1, - HBG1=>1,HBG2=>1,HBE1=>1, - HBZ=>1,HBZP1=>1,HBA2=>1,HBA1=>1); - -open OUT, ">$opt{outdir}\/countTable.fpkm.txt" or die $!; -my @samples = sort {$a cmp $b} keys %samps; -print OUT join("\t",'ENSEMBL','SYMBOL','TYPE',@samples),"\n"; - -foreach $gene (keys %cts) { - my @line; - foreach $sample (@samples) { - unless ($cts{$gene}{$sample}) { - $cts{$gene}{$sample} = 0; - } - push @line, $cts{$gene}{$sample}; - } - my @sortct = sort {$b cmp $a} @line; - #next unless ($sortct[0] >= 1); - next if ($names{$gene}{type} eq 'rRNA'); - next if ($exc{$gene}); - print OUT join("\t",$names{$gene}{ensembl},$gene, - $names{$gene}{type},@line),"\n"; -} -close OUT; diff --git a/workflow/scripts/dea.R b/workflow/scripts/dea.R deleted file mode 100644 index 4327db1..0000000 --- a/workflow/scripts/dea.R +++ /dev/null @@ -1,242 +0,0 @@ -#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript -library(edgeR) -library(DESeq2) -library("RColorBrewer") -library("gplots") -library(qusage) - -rowMax <- function(x) apply(x,1,max) -col.grp <- function(n,b) { - colorvec <- vector(mode="character", length=length(n)) - plotcols <- rainbow(length(b)) - for (i in 1:length(n)) { - for (j in 1:length(b)) { - if ( n[i] == b[j] ) { - colorvec[i] = plotcols[j] - } - } - } - c(colorvec) -} - - -#################### Read in Data ################################ -genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t') - -tbl <- read.table('countTable.txt',header=TRUE,sep="\t") -tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t") -ct <- tbl[,4:length(tbl)] -row.names(ct) <- tbl$ENSEMBL - -samples<- names(ct) - -dtbl <- read.table('design.txt',header=TRUE,sep="\t") -samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE) -grpnames <- levels(factor(as.character(samtbl$SampleGroup))) -samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames) - -colData <- samtbl[c('SampleGroup','SubjectID')] -row.names(colData) <- samtbl$samples - -dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup) -dds <- dds[ rowMax(counts(dds)) > 30, ] -dds <- dds[ colSums(counts(dds)) > 1000000] - -if (nrow(counts(dds)) < 1) { - print(paste("Samples are filtered if there is < 1M reads. There are less than no remaining sample(s) after this filter",sep=' ')) - q() -} - -countTable <- counts(dds) -#grps <- as.character(levels(colData(dds)$SampleGroup)) -grps <- as.character(colData(dds)$SampleGroup) -col.blocks <-col.grp(grps,levels(factor(grps))) -libSizes <- as.vector(colSums(countTable)) - -logcpm <- tbl2[,4:length(tbl2)] -row.names(logcpm) <- tbl2$SYMBOL - -tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean) -row.names(tmp.tab) <- tmp.tab$Group.1 -mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2) - -#################### Run DESEQ2 ################################ -dds <- DESeq(dds) -rld <- rlogTransformation(dds, blind=TRUE) -sampleDists <- dist(t(assay(rld))) - -png(file="samples_heatmap.png",bg ="white",height=768,width=1024) -par(mar=c(7,4,4,2)+0.1) -heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(8,8), cexRow = 1.5, cexCol = 1.5) -dev.off() - -#Compare Samples using PCA -png(file="pca.png",bg ="white",height=768,width=1024) -print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks) -dev.off() - -#Do all pairwise comparisons -contrast <- resultsNames(dds) -cond<- levels(colData(dds)$SampleGroup) -a <- length(cond)-1 -for (i in 1:a) { - for (j in 2:length(cond)) { - if (i == j) { - next - } else { - res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j]))) - res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE) - output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol') - output$rawP <- output$pvalue - output$logFC <- output$log2FoldChange - output$fdr <- output$padj - output$bonf <- p.adjust(output$rawP, method ='bonferroni') - write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t') - filt.out <- na.omit(output[output$fdr < 0.05,]) - if (nrow(filt.out) > 2) { - subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,] - subset <- subset[!apply(subset, 1, function(x) {any(x == 0)}),] - gnames <- filt.out[c('ensembl','symbol')] - s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE) - STREE <- hclust(dist(t(subset))) - zscores <- scale(t(subset)) - ngenes <- length(colnames(zscores)) - textscale <- (1/(ngenes/30)) - if (textscale > 1) { - textscale <-1 - } - if (textscale < 0.1) { - textscale <- 0.1 - } - png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024) - heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5)) - legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5) - dev.off() - } - } - } -} - -###################### Run EdgeR ######################## -###################### Run QuSage ###################### - -MSIG.geneSets <- read.gmt('geneset.gmt') - -design <- model.matrix(~grps) -d <- DGEList(counts=countTable,group=grps,lib.size=libSizes) -d <- calcNormFactors(d) -d <- estimateCommonDisp(d) -png(file="mds.png",bg ="white",height=768,width=1024) -plotMDS(d, labels=grps,col=col.blocks, cex.axis=1.5, cex.lab=1.5, cex=1.5) -op <- par(cex = 1.5) -legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20) -dev.off() -cond <-levels(d$samples$group) -colnames(design) <- levels(d$samples$group) -a <- length(cond)-1 -for (i in 1:a) { - for (j in 2:length(cond)) { - if (i == j) { - next - } else { - c <- exactTest(d, pair=c(cond[j],cond[i])) - res <- c$table - res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE) - output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol') - output$rawP <- output$PValue - output$logFC <- output$logFC - output$fdr <- p.adjust(output$rawP, method ='fdr') - output$bonf <- p.adjust(output$rawP, method ='bonferroni') - write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t') - filt.out <- na.omit(output[output$fdr < 0.05,]) - if (nrow(filt.out) > 2) { - subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,] - subset <- subset[!apply(subset, 1, function(x) {any(x == 0)}),] - gnames <- filt.out[c('ensembl','symbol')] - s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE) - STREE <- hclust(dist(t(subset))) - zscores <- scale(t(subset)) - ngenes <- length(colnames(zscores)) - textscale <- (1/(ngenes/30)) - if (textscale > 1) { - textscale <-1 - } - if (textscale < 0.1) { - textscale <- 0.1 - } - png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024) - heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5)) - legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5) - dev.off() - gcont <- paste(cond[j],cond[i],sep='-') - qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets) - save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep="")) - } - } - } - } -} - -###################### Run Limma VOOM ######################## - -# design <- model.matrix(~0+grps) -# colnames(design) <- grpnames -# a <- length(cond)-1 -# design.pairs <- c() -# k <- 0 -# for (i in 1:a) { -# for (j in 2:length(cond)) { -# if (i == j) { -# next -# } else { -# k <- k+1 -# design.pairs[k] <- paste(cond[i],'-',cond[j],sep='') -# } -# } -# } - -#contrast.matrix <- makeContrasts(design.pairs,levels=design) - -#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes) -#d <- calcNormFactors(d) -#d <- estimateCommonDisp(d) - -# v <- voom(d,design,plot=TRUE) -# fit <- lmFit(v,design) -# fit2 <- contrasts.fit(fit, contrast.matrix) -# fit2 <- eBayes(fit2) - -# comps <- colnames(fit2$coefficients) -# for (i in 1:length(comps)) { -# res <- topTable(fit2,coef=i,number=Inf,sort.by="P") -# res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE) -# output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol') -# output$rawP <- output$P.Value -# output$logFC <- output$adj.P.Val -# output$fdr <- p.adjust(output$rawP, method ='fdr') -# output$bonf <- p.adjust(output$rawP, method ='bonferroni') -# write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t') -# filt.out <- na.omit(output[output$fdr < 0.05,]) -# if (nrow(filt.out) > 2) { -# subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,] -# gnames <- filt.out[c('ensembl','symbol')] -# s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE) -# STREE <- hclust(dist(t(subset))) -# zscores <- scale(t(subset)) -# ngenes <- length(colnames(zscores)) -# textscale <- (1/(ngenes/30)) -# if (textscale > 1) { -# textscale <-1 -# } -# if (textscale < 0.1) { -# textscale <- 0.1 -# } -# png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024) -# heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5)) -# legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5) -# dev.off() -# } -# } - - - diff --git a/workflow/scripts/gencode_genename.pl b/workflow/scripts/gencode_genename.pl deleted file mode 100755 index f2a81f5..0000000 --- a/workflow/scripts/gencode_genename.pl +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/perl -w -#parse_gencode.pl - -open OUT, ">genenames.txt" or die $!; -print OUT join("\t",'chrom','start','end','ensembl','symbol','type'),"\n"; -my $gtf = shift @ARGV; -open GCODE, "<$gtf" or die $!; -while (my $line = <GCODE>) { - chomp($line); - next if ($line =~ m/^#/); - my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) = - split(/\t/,$line); - next unless ($feature eq 'gene'); - $info =~ s/\"//g; - my %hash; - foreach $a (split(/;\s*/,$info)) { - my ($key,$val) = split(/ /,$a); - $hash{$key} = $val; - } - $hash{gene_id} =~ s/\.\d+//; - print OUT join("\t",$chrom,$start,$end,$hash{gene_id},$hash{gene_name},$hash{gene_type}),"\n"; -} diff --git a/workflow/scripts/uniform_integrated_vcf.pl b/workflow/scripts/uniform_integrated_vcf.pl deleted file mode 100755 index e2b84dc..0000000 --- a/workflow/scripts/uniform_integrated_vcf.pl +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/perl -#migrate_db.pl - -my $vcf = shift @ARGV; -my $outfile = $vcf; -$outfile =~ s/int.vcf/uniform.vcf/; -open VCF, "<$vcf" or die $!; -open OUT, ">$outfile" or die $!; - -while (my $line = <VCF>) { - chomp($line); - if ($line =~ m/#/) { - print OUT $line,"\n"; - if ($line =~ m/##INFO=<ID=[AO|AD|DP|RO]/) { - $line =~ s/INFO/FORMAT/; - print OUT $line,"\n"; - } - next; - } - my ($chrom, $pos,$id,$ref,$alt,$score, - $filter,$annot,$format,$allele_info) = split(/\t/, $line); - my %hash = (); - foreach $a (split(/;/,$annot)) { - my ($key,$val) = split(/=/,$a); - $hash{$key} = $val; - } - my @deschead = split(/:/,$format); - my @gtinfo = split(/:/,$allele_info); - my %gtdata; - foreach my $i (0..$#deschead) { - $gtdata{$deschead[$i]} = $gtinfo[$i]; - } - $newformat = 'GT:DP:AD:AO:RO'; - $gtdata{DP} = $hash{DP} unless ($gtdata{DP}); - unless ($gtdata{AO}) { - if ($gtdata{AD}){ - ($gtdata{RO},$gtdata{AO}) = split(/,/,$gtdata{AD}); - }elsif ($hash{AD}){ - ($gtdata{RO},$gtdata{AO}) = split(/,/,$hash{AD}); - }elsif ($gtdata{NR} && $gtdata{NV}) { - $gtdata{DP} = $gtdata{NR}; - $gtdata{AO} = $gtdata{NV}; - $gtdata{RO} = $gtdata{DP} - $gtdata{AO}; - }elsif ($hash{TR}) { - $gtdata{AO} = $hash{TR}; - $gtdata{DP} = $hash{TC}; - $gtdata{RO} = $gtdata{DP} - $gtdata{AO}; - } - } - if ($gtdata{DP} < $gtdata{AO}+$gtdata{RO}) { - warn "inconsistent\n"; - } - unless ($gtdata{AD}) { - if (exists $gtdata{RO} && exists $gtdata{AO}) { - $gtdata{AD} = join(",",$gtdata{RO},$gtdata{AO}); - } - } - unless (exists $gtdata{GT} && exists $gtdata{DP} && exists $gtdata{AD} && exists $gtdata{AO} && exists $gtdata{RO}) { - warn "Missing Information\n"; - } - $newgt = join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO}); - print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,$newgt),"\n"; -} -- GitLab